mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
18798 lines
570 KiB
JSON
18798 lines
570 KiB
JSON
{
|
|
"devices": [
|
|
{
|
|
"id": 0,
|
|
"name": "NVIDIA Quadro GV100",
|
|
"sm_version": 700,
|
|
"ptx_version": 700,
|
|
"sm_default_clock_rate": 1627000000,
|
|
"number_of_sms": 80,
|
|
"max_blocks_per_sm": 32,
|
|
"max_threads_per_sm": 2048,
|
|
"max_threads_per_block": 1024,
|
|
"registers_per_sm": 65536,
|
|
"registers_per_block": 65536,
|
|
"global_memory_size": 34078982144,
|
|
"global_memory_bus_peak_clock_rate": 850000000,
|
|
"global_memory_bus_width": 4096,
|
|
"global_memory_bus_bandwidth": 870400000000,
|
|
"l2_cache_size": 6291456,
|
|
"shared_memory_per_sm": 98304,
|
|
"shared_memory_per_block": 49152,
|
|
"ecc_state": false
|
|
},
|
|
{
|
|
"id": 1,
|
|
"name": "NVIDIA Quadro GP100",
|
|
"sm_version": 600,
|
|
"ptx_version": 600,
|
|
"sm_default_clock_rate": 1442500000,
|
|
"number_of_sms": 56,
|
|
"max_blocks_per_sm": 32,
|
|
"max_threads_per_sm": 2048,
|
|
"max_threads_per_block": 1024,
|
|
"registers_per_sm": 65536,
|
|
"registers_per_block": 65536,
|
|
"global_memory_size": 17069309952,
|
|
"global_memory_bus_peak_clock_rate": 715000000,
|
|
"global_memory_bus_width": 4096,
|
|
"global_memory_bus_bandwidth": 732160000000,
|
|
"l2_cache_size": 4194304,
|
|
"shared_memory_per_sm": 65536,
|
|
"shared_memory_per_block": 49152,
|
|
"ecc_state": false
|
|
}
|
|
],
|
|
"benchmarks": [
|
|
{
|
|
"index": 0,
|
|
"name": "simple",
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"devices": [
|
|
0,
|
|
1
|
|
],
|
|
"axes": null,
|
|
"states": {
|
|
"Device=0": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": null,
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "486"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010094132736625523"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005987183296179167"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010034002306039446"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005072701393681687"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001001473929135854"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "524"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": null,
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "488"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010074898913934418"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005542305355933818"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010027081287298028"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00035037919649082367"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010014748609703007"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "524"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"index": 1,
|
|
"name": "single_float64_axis",
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"devices": [
|
|
0,
|
|
1
|
|
],
|
|
"axes": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "0",
|
|
"description": "",
|
|
"value": 0.0
|
|
},
|
|
{
|
|
"input_string": "0.0001",
|
|
"description": "",
|
|
"value": 0.0001
|
|
},
|
|
{
|
|
"input_string": "0.0002",
|
|
"description": "",
|
|
"value": 0.0002
|
|
},
|
|
{
|
|
"input_string": "0.0003",
|
|
"description": "",
|
|
"value": 0.00030000000000000003
|
|
},
|
|
{
|
|
"input_string": "0.0004",
|
|
"description": "",
|
|
"value": 0.0004
|
|
},
|
|
{
|
|
"input_string": "0.0005",
|
|
"description": "",
|
|
"value": 0.0005
|
|
},
|
|
{
|
|
"input_string": "0.0006",
|
|
"description": "",
|
|
"value": 0.0006000000000000001
|
|
},
|
|
{
|
|
"input_string": "0.0007",
|
|
"description": "",
|
|
"value": 0.0007000000000000001
|
|
},
|
|
{
|
|
"input_string": "0.0008",
|
|
"description": "",
|
|
"value": 0.0008000000000000001
|
|
},
|
|
{
|
|
"input_string": "0.0009",
|
|
"description": "",
|
|
"value": 0.0009000000000000002
|
|
},
|
|
{
|
|
"input_string": "0.001",
|
|
"description": "",
|
|
"value": 0.0010000000000000002
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"states": {
|
|
"Device=0 Duration=0": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "14061"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "9.102689638005845e-06"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.033946388108068055"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "3.7547417902904438e-06"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.12549022159970946"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "1.630773172830879e-06"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "306655"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0001": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0001"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "3835"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00010860168552803123"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004007949999262656"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00010303751935470811"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004789691009751296"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00010137620362095862"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "5088"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0002": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0002"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2174"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00020898149126034966"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002070700973146156"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00020338884861017417"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002418204625044133"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002017283984223771"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2583"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0003": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.00030000000000000003"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1520"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00030825112500000015"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014009307905580174"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00030272901975793895"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0016163896900565434"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003010571695496376"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1742"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0004": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0004"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1166"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004085718481989706"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010690404823574895"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00040307120334734023"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0012226190019077351"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004014095938278854"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1304"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0005": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0005"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "945"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005089798201058188"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008530028319072816"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005034217145707861"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009752402596440034"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005017619516657686"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1044"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0006": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0006000000000000001"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "796"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006082355979899511"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007134353357638104"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006027260286424639"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008279817736951732"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006010891975612815"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "872"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0007": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0007000000000000001"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "685"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007086865854014601"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006295331091145095"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007030571342384726"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007151653876403053"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007014426981064088"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "748"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0008": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0008000000000000001"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "602"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008090872425249167"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005562631850494214"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008034305715085621"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006219681072125149"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008017951428707951"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "654"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0009": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0009000000000000002"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "538"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009084568382899636"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005031047519089767"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009027937730448745"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005501738587938111"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009011217884181701"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "582"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.001": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0010000000000000002"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "487"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010086481827515403"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004266615566594544"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010031193825253714"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004975122529595318"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001001475909284053"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "524"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "15089"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "8.108349592418312e-06"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.05461449121054022"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "3.271210544150035e-06"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.059765735669007766"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "1.3421442998656208e-06"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "372558"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0001": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0001"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "3944"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00010710262145030443"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004128650771669589"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00010247565930403145"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0030818570098060543"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00010137613820964433"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "5117"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0002": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0002"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2193"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00020765215686274505"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0021648763590408093"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00020284258628946086"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0015447061481155045"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002017285137353667"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2584"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0003": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.00030000000000000003"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1537"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003068213201040992"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0013512096196898148"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00030219575751114794"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010543163243715088"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003010567871656286"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1736"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0004": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0004"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1176"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00040721289880952437"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010016437258221326"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004025331704186726"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007456691947680211"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004014084236753499"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1304"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0005": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0005"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "951"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005075412103049417"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000846863074833117"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005028813449366248"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006292766848433991"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005017613753177333"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1045"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0006": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0006000000000000001"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "800"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006068351487499997"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006561812659454387"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006021752006560568"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000511717182892197"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006010895299747637"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "873"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0007": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0007000000000000001"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "690"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007071279246376804"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005386426703062701"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007025530446266783"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00042821786377290075"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007014415557371741"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "748"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0008": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0008000000000000001"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "605"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008076996363636364"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006014433173443102"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008029008409208492"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00036509958633429017"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008017936496559632"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "654"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0009": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0009000000000000002"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "540"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009070510574074071"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00046472458647248545"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009022252441556363"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00033895812399517745"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009011227322607926"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "582"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.001": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0010000000000000002"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "488"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010073550901639342"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004238073408932392"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010025966528986322"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003136332645329908"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001001473929135854"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "524"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"index": 2,
|
|
"name": "copy_sweep_grid_shape",
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"devices": [
|
|
0,
|
|
1
|
|
],
|
|
"axes": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"flags": "pow2",
|
|
"values": [
|
|
{
|
|
"input_string": "6",
|
|
"description": "2^6 = 64",
|
|
"value": 64
|
|
},
|
|
{
|
|
"input_string": "8",
|
|
"description": "2^8 = 256",
|
|
"value": 256
|
|
},
|
|
{
|
|
"input_string": "10",
|
|
"description": "2^10 = 1024",
|
|
"value": 1024
|
|
}
|
|
]
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"flags": "pow2",
|
|
"values": [
|
|
{
|
|
"input_string": "6",
|
|
"description": "2^6 = 64",
|
|
"value": 64
|
|
},
|
|
{
|
|
"input_string": "8",
|
|
"description": "2^8 = 256",
|
|
"value": 256
|
|
},
|
|
{
|
|
"input_string": "10",
|
|
"description": "2^10 = 1024",
|
|
"value": 1024
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"states": {
|
|
"Device=0 BlockSize=2^6 NumBlocks=2^6": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "71"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007065658352112677"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.05358128799632556"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007059958081849862"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.053589324741995806"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "9505561254.326319"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "76044490034.61055"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.08736729094049925"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006475561071325232"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "81"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 BlockSize=2^8 NumBlocks=2^6": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "229"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0021687765283842793"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006699637202043051"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0021633964730141996"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00669331351204079"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "31020141170.19388"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "248161129361.55103"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.28511159163781136"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002160161503025743"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "244"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 BlockSize=2^10 NumBlocks=2^6": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "448"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010963011227678571"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.013516109455086892"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001090899714667882"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.013565950821979889"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "61516987398.26961"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "492135899186.15686"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5654134871164486"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010871857387360318"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "481"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 BlockSize=2^6 NumBlocks=2^8": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "229"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002169116519650655"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.003880325099879575"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0021636720515755057"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0038733421374846436"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "31016190254.495274"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "248129522035.9622"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.2850752780744051"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0021606314702289093"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "243"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 BlockSize=2^8 NumBlocks=2^8": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "456"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010761263311403508"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.011961974879208899"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001070721754902288"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.012050980053815875"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "62676286993.08928"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "501410295944.71423"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5760688142747177"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010696770163143381"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "493"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 BlockSize=2^10 NumBlocks=2^8": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "500"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000980373466000001"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005650663121151804"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009750024316310896"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005680157515531913"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "68829432443.29456"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "550635459546.3564"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6326234599567514"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009731230225510264"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "542"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 BlockSize=2^6 NumBlocks=2^10": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "459"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010701848496732027"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008617668166839768"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010647455503218568"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008515610201608317"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "63028076501.20161"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "504224612009.61285"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5793021737242795"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010672177234327936"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "498"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 BlockSize=2^8 NumBlocks=2^10": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "500"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000979696614"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0074291976714003565"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009743501433134098"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0074775515242700395"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "68875510986.00674"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "551004087888.054"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6330469759743267"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009717721991970888"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "541"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 BlockSize=2^10 NumBlocks=2^10": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "475"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010337088463157895"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.021637984186463816"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010282407758110449"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.02173209936637211"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "65265709723.54853"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "522125677788.38824"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.599868655547321"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010291563019039125"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "508"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^6 NumBlocks=2^6": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "76"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006647754513157893"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011430629751785044"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006643085875009235"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011389249175732911"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "10102061792.16473"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "80816494337.31784"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.11038091993186987"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006639652300484573"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "79"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^8 NumBlocks=2^6": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "216"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002300918597222223"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0022260554559899452"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0022963019234162794"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0022413389898784455"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "29224756255.11826"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "233798050040.94608"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.3193264450952607"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0022975726211280152"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "228"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^10 NumBlocks=2^6": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "418"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011795720191387577"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0035334409960244696"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001174919423874485"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0035420884521558988"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "57117843688.972115"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "456942749511.7769"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6241023130350974"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011729015622820172"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "448"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^6 NumBlocks=2^8": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "224"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0022223120000000006"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014441799301084402"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00221759328778301"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001434325968668793"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "30262025218.83109"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "242096201750.6487"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.3306602405903747"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002216961359573623"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "236"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^8 NumBlocks=2^8": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "435"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011336455977011492"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006534400600481561"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001129045183631195"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0065638034102788135"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "59438599068.433075"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "475508792547.4646"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6494602170938929"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011272204485062364"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "466"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^10 NumBlocks=2^8": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "437"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011265385652173912"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002220966435104119"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011218978122933775"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0022003475082832675"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "59817269687.70571"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "478538157501.6457"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6535977894198614"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001119863004765959"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "468"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^6 NumBlocks=2^10": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "439"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011232369088838266"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00285184985884414"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011185731920403065"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0028360480110887457"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "59995058416.86738"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "479960467334.939"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6555404110234635"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011161975045489451"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "468"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^8 NumBlocks=2^10": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "440"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011206702840909095"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002536479032620614"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011160453837026254"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0025536971451898373"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "60130945371.914566"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "481047562975.3165"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6570251898155001"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011138856279089096"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "470"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^10 NumBlocks=2^10": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "464"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010597870474137931"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0020209648798997564"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010551077248207455"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002011195776784625"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "63603803120.10441"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "508830424960.83527"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6949716250011408"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010536742918941392"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "498"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"index": 3,
|
|
"name": "copy_type_sweep",
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"devices": [
|
|
0,
|
|
1
|
|
],
|
|
"axes": {
|
|
"T": {
|
|
"type": "type",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "U8",
|
|
"description": "uint8_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "U16",
|
|
"description": "uint16_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "U32",
|
|
"description": "uint32_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "U64",
|
|
"description": "uint64_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "F32",
|
|
"description": "float",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "F64",
|
|
"description": "double",
|
|
"is_active": true
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"states": {
|
|
"Device=0 T=U8": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "217"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002284935774193548"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.003019023225421965"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0022794654072704396"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0030185067855524154"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "117762460945.3669"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "235524921890.7338"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.2705938900399056"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0022792820785984846"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "231"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 T=U16": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "341"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014459254017595295"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005620271181121053"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014404413371491634"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005659383776137258"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "93178197916.5051"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "372712791666.0204"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.4282086301309977"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014370339589576198"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "365"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 T=U32": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "456"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010763392214912279"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009580925422442722"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010708663173412028"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009602261983780735"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "62667825958.53892"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "501342607668.31134"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5759910474130415"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010690977880559816"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "489"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 T=U64": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "514"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009534325642023344"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007974682202520992"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009479809484593146"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008022855237026269"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "35395681795.64538"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "566330908730.326"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6506559153611283"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009457213474094653"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "554"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 T=F32": {
|
|
"device": 0,
|
|
"type_config_index": 4,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "456"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010769479144736836"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.011261863999383217"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001071445541946512"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.011287071608158339"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "62633947664.836296"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "501071581318.69037"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5756796660370983"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001069358981385523"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "490"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 T=F64": {
|
|
"device": 0,
|
|
"type_config_index": 5,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "514"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009534943599221791"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006006780711077088"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009480226613900089"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00602313677626831"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "35394124388.125755"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "566305990210.0121"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6506272865464293"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009457029259723165"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "552"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 T=U8": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "184"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00270240325"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0033226300614619185"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002697714079981265"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0033217171224860604"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "99504783695.18842"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "199009567390.37683"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.2718115813351956"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0026982716095753207"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "195"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 T=U16": {
|
|
"device": 1,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "325"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0015216281538461547"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0046556036312148845"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0015169812690294725"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004682337277211795"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "88476852509.76712"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "353907410039.0685"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.4833744127500389"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0015158526066057275"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "347"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 T=U32": {
|
|
"device": 1,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "435"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011331533540229887"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006418753103730108"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011284679349811587"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0064621372230947265"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "59469003876.588196"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "475752031012.70557"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6497924374627206"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011265910963430138"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "467"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 T=U64": {
|
|
"device": 1,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "468"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010515641474358975"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002762541639974713"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001046885606570122"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002764528097772722"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "32051670009.99595"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "512826720159.9352"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.7004298516170443"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001044835600653889"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "503"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 T=F32": {
|
|
"device": 1,
|
|
"type_config_index": 4,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "435"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011328659609195397"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006308260028809877"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011281658846756504"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006329740046854081"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "59484925853.163795"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "475879406825.31036"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6499664101088701"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011261699270694815"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "470"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 T=F64": {
|
|
"device": 1,
|
|
"type_config_index": 5,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "468"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010518281880341881"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002638709647720786"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010471613009770718"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0026399350413532966"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "32043231514.27718"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "512691704228.4349"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.7002454439308824"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010447449703140563"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "502"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"index": 4,
|
|
"name": "copy_type_conversion_sweep",
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"devices": [
|
|
0,
|
|
1
|
|
],
|
|
"axes": {
|
|
"In": {
|
|
"type": "type",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "I8",
|
|
"description": "int8_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "I16",
|
|
"description": "int16_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "I32",
|
|
"description": "int32_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "F32",
|
|
"description": "float",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "I64",
|
|
"description": "int64_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "F64",
|
|
"description": "double",
|
|
"is_active": true
|
|
}
|
|
]
|
|
},
|
|
"Out": {
|
|
"type": "type",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "I8",
|
|
"description": "int8_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "I16",
|
|
"description": "int16_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "I32",
|
|
"description": "int32_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "F32",
|
|
"description": "float",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "I64",
|
|
"description": "int64_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "F64",
|
|
"description": "double",
|
|
"is_active": true
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"states": {
|
|
"Device=0 In=I8 Out=I8": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=0 In=I8 Out=I16": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "775"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006248230980645156"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0027640779893251216"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006193935315070645"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0028186397219177456"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "108346084655.93024"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "325038253967.7907"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.37343549398873016"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006171660299862132"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "850"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I8 Out=I32": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "268435456"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "660"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007372658136363634"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004348049843468552"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007317814296845251"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004351029775591727"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "91706158803.36154"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "458530794016.8077"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5268046806259279"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007299521218782687"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "717"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I8 Out=F32": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "268435456"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "656"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000742387521341463"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0041525675601748364"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007369443420775064"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004193469264853706"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "91063680346.35373"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "455318401731.7686"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5231139725778592"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007352807822347689"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "714"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I8 Out=I64": {
|
|
"device": 0,
|
|
"type_config_index": 4,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "536870912"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "407"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0012095483882063889"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009732185124544102"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001204128551248836"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009798212399727946"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "55732308589.8092"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "501590777308.2828"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.576276168782494"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0012017273091491842"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "429"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I8 Out=F64": {
|
|
"device": 0,
|
|
"type_config_index": 5,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "536870912"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "415"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011847366168674703"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.011261383409993239"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011792877487389432"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.011302242538631406"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "56906267424.351715"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "512156406819.16547"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5884149894521662"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011767830588600852"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "440"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I16 Out=I8": {
|
|
"device": 0,
|
|
"type_config_index": 6,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=I16 Out=I16": {
|
|
"device": 0,
|
|
"type_config_index": 7,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=0 In=I16 Out=I32": {
|
|
"device": 0,
|
|
"type_config_index": 8,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "33554432"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1105"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00043142517375565617"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.01116818587784149"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00042600826737028365"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.011332580467569093"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "78764743715.25449"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "472588462291.5269"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5429554943606697"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00042359266142467694"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1238"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I16 Out=F32": {
|
|
"device": 0,
|
|
"type_config_index": 9,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "33554432"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1102"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00043289838384754937"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008465395678081931"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00042745939692221985"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008617999240612035"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "78497354933.81969"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "470984129602.9181"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5411122812533525"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00042536910129233627"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1229"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I16 Out=I64": {
|
|
"device": 0,
|
|
"type_config_index": 10,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "33554432"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "268435456"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "734"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006609588569482289"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007896476276327823"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006555628124472239"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007981909890800989"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "51184160179.466095"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "511841601794.66095"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5880533108854101"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006538430490801411"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "806"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I16 Out=F64": {
|
|
"device": 0,
|
|
"type_config_index": 11,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "33554432"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "268435456"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "734"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006605395899182562"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007740408518735753"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006550883051485072"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007833851008491804"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "51221234963.72489"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "512212349637.2489"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5884792619913246"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006534532250824923"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "805"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I32 Out=I8": {
|
|
"device": 0,
|
|
"type_config_index": 12,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=I32 Out=I16": {
|
|
"device": 0,
|
|
"type_config_index": 13,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=I32 Out=I32": {
|
|
"device": 0,
|
|
"type_config_index": 14,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=0 In=I32 Out=F32": {
|
|
"device": 0,
|
|
"type_config_index": 15,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1735"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00026702492853025945"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.01324576727299336"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00026161364844278195"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.013450268523907918"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "64129742847.37816"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "513037942779.02527"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5894277835236963"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00025957003988639885"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2015"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I32 Out=I64": {
|
|
"device": 0,
|
|
"type_config_index": 16,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1234"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003841953128038892"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008873245446388355"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003788044850192556"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008932234099031263"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "44289908550.44172"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "531478902605.3006"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6106145480299869"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00037766468619885956"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1381"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I32 Out=F64": {
|
|
"device": 0,
|
|
"type_config_index": 17,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1235"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003840312064777327"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009389520289783196"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00037863498520754796"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009545097161422792"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "44309735379.58624"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "531716824555.03485"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6108878958582662"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003773968978051128"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1396"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=F32 Out=I8": {
|
|
"device": 0,
|
|
"type_config_index": 18,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=F32 Out=I16": {
|
|
"device": 0,
|
|
"type_config_index": 19,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=F32 Out=I32": {
|
|
"device": 0,
|
|
"type_config_index": 20,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1726"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00026856249884125153"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.01342456387766187"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00026315643022814674"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.013724796519135959"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "63753775598.24316"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "510030204785.94525"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5859722021897349"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002609094005709575"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2047"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=F32 Out=F32": {
|
|
"device": 0,
|
|
"type_config_index": 21,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=0 In=F32 Out=I64": {
|
|
"device": 0,
|
|
"type_config_index": 22,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1235"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003840352834008098"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009209302867708775"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00037863381922486526"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009434239106344595"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "44309871829.05669"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "531718461948.68024"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6108897770550095"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00037729541193829834"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1370"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=F32 Out=F64": {
|
|
"device": 0,
|
|
"type_config_index": 23,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1233"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003844534225466336"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009387088977698597"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00037907109053659035"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009568452852068391"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "44258758894.67376"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "531105106736.0851"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6101850950552448"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003776787067281789"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1392"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I64 Out=I8": {
|
|
"device": 0,
|
|
"type_config_index": 24,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=I64 Out=I16": {
|
|
"device": 0,
|
|
"type_config_index": 25,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=I64 Out=I32": {
|
|
"device": 0,
|
|
"type_config_index": 26,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=I64 Out=F32": {
|
|
"device": 0,
|
|
"type_config_index": 27,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=I64 Out=I64": {
|
|
"device": 0,
|
|
"type_config_index": 28,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=0 In=I64 Out=F64": {
|
|
"device": 0,
|
|
"type_config_index": 29,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "8388608"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1865"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002468652632707771"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008794568336063534"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002414397094508553"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009088437943671243"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "34744110730.913086"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "555905771694.6094"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6386785060829612"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00023926271107803853"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2168"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=F64 Out=I8": {
|
|
"device": 0,
|
|
"type_config_index": 30,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=F64 Out=I16": {
|
|
"device": 0,
|
|
"type_config_index": 31,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=F64 Out=I32": {
|
|
"device": 0,
|
|
"type_config_index": 32,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=F64 Out=F32": {
|
|
"device": 0,
|
|
"type_config_index": 33,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=F64 Out=I64": {
|
|
"device": 0,
|
|
"type_config_index": 34,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "8388608"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1861"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002474318479312196"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009416123268532244"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00024199313163148308"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009609928378243537"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "34664653262.864136"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "554634452205.8262"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6372178908614731"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00024011272523290366"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2177"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=F64 Out=F64": {
|
|
"device": 0,
|
|
"type_config_index": 35,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=1 In=I8 Out=I8": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=1 In=I8 Out=I16": {
|
|
"device": 1,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "715"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006812909104895107"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.029682520209047932"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006765060471488043"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.029725089166496972"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "99199207875.28265"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "297597623625.84796"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.4064652857651988"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000659287437142213"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "797"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I8 Out=I32": {
|
|
"device": 1,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "268435456"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "566"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008641483356890464"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00815440605473416"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008593197461783684"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008178118032486274"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "78095335640.14047"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "390476678200.70233"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5333215119655572"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008574365556141886"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "607"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I8 Out=F32": {
|
|
"device": 1,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "268435456"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "568"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008621727816901408"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008348927642653206"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000857566987334842"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008407666935430734"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "78254952663.88672"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "391274763319.4336"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5344115539218662"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008559337940091401"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "612"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I8 Out=I64": {
|
|
"device": 1,
|
|
"type_config_index": 4,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "536870912"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "339"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014581254159292036"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005934832249204677"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001453499562620765"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005963799027107206"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "46170542961.153595"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "415534886650.3824"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5675465562860337"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014501432381838642"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "361"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I8 Out=F64": {
|
|
"device": 1,
|
|
"type_config_index": 5,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "536870912"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "339"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014608549616519177"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005454444454530878"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014561624537527042"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0054738241927221685"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "46086110671.96002"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "414774996047.64026"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5665086812276555"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014524769206623453"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "364"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I16 Out=I8": {
|
|
"device": 1,
|
|
"type_config_index": 6,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=I16 Out=I16": {
|
|
"device": 1,
|
|
"type_config_index": 7,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=1 In=I16 Out=I32": {
|
|
"device": 1,
|
|
"type_config_index": 8,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "33554432"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1042"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00046152389539347375"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007516961198942111"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004568425950928514"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0075614567935713"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "73448562722.52853"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "440691376335.17114"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6019058352479938"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00045486935942230756"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1156"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I16 Out=F32": {
|
|
"device": 1,
|
|
"type_config_index": 9,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "33554432"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1047"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00045967601432664773"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007580415029008197"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00045502618507418957"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007602410404504316"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "73741760585.77625"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "442450563514.6575"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6043085712339618"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004530724069916505"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1164"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I16 Out=I64": {
|
|
"device": 1,
|
|
"type_config_index": 10,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "33554432"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "268435456"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "648"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007539600570987655"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005701338376763893"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000749293333218421"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005730659247124155"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "44781436738.365845"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "447814367383.65845"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6116345708365091"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007462590063859665"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "701"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I16 Out=F64": {
|
|
"device": 1,
|
|
"type_config_index": 11,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "33554432"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "268435456"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "650"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007515365646153841"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005320261152122883"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007468673968315132"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00533121216008688"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "44926893505.259796"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "449268935052.59796"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6136212508913325"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007440289011028757"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "702"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I32 Out=I8": {
|
|
"device": 1,
|
|
"type_config_index": 12,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=I32 Out=I16": {
|
|
"device": 1,
|
|
"type_config_index": 13,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=I32 Out=I32": {
|
|
"device": 1,
|
|
"type_config_index": 14,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=1 In=I32 Out=F32": {
|
|
"device": 1,
|
|
"type_config_index": 15,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1688"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00027765218187203764"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005690620491369388"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00027302053109941316"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005713997774637474"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "61450382256.75059"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "491603058054.0047"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6714421138193901"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00027140032503120137"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1928"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I32 Out=I64": {
|
|
"device": 1,
|
|
"type_config_index": 16,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1134"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000422905379188712"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004818481737573335"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004182333121013812"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004829428135064118"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "40114489961.844894"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "481373879542.13873"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6574708800564614"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004160488643510754"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1267"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I32 Out=F64": {
|
|
"device": 1,
|
|
"type_config_index": 17,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1132"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004233320008833917"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004685003714910728"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00041865130761381596"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004676709118042214"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "40074438309.11453"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "480893259709.37445"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6568144390698405"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00041636213471617884"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1264"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=F32 Out=I8": {
|
|
"device": 1,
|
|
"type_config_index": 18,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=F32 Out=I16": {
|
|
"device": 1,
|
|
"type_config_index": 19,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=F32 Out=I32": {
|
|
"device": 1,
|
|
"type_config_index": 20,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1665"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002817099831831833"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.012603278274487326"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002770048382224978"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0127786417628205"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "60566508901.63906"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "484532071213.1125"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6617844067049723"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002751834324989535"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1941"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=F32 Out=F32": {
|
|
"device": 1,
|
|
"type_config_index": 21,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=1 In=F32 Out=I64": {
|
|
"device": 1,
|
|
"type_config_index": 22,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1133"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004230943777581643"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004719817832949844"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00041844157444515244"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004751688895767683"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "40094524599.393234"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "481134295192.7188"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.657143650558237"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004160357588015425"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1252"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=F32 Out=F64": {
|
|
"device": 1,
|
|
"type_config_index": 23,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1132"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00042342536395759757"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004748798224952708"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00041871643782504436"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004750041166889743"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "40068204838.45002"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "480818458061.40027"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6567122733574632"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00041632065453087554"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1252"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I64 Out=I8": {
|
|
"device": 1,
|
|
"type_config_index": 24,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=I64 Out=I16": {
|
|
"device": 1,
|
|
"type_config_index": 25,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=I64 Out=I32": {
|
|
"device": 1,
|
|
"type_config_index": 26,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=I64 Out=F32": {
|
|
"device": 1,
|
|
"type_config_index": 27,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=I64 Out=I64": {
|
|
"device": 1,
|
|
"type_config_index": 28,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=1 In=I64 Out=F64": {
|
|
"device": 1,
|
|
"type_config_index": 29,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "8388608"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1753"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002666450433542495"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004046628770937376"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00026198611749762206"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004009600477982423"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "32019284380.88381"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "512308550094.1409"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6997221237081251"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00026007244216493403"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2008"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=F64 Out=I8": {
|
|
"device": 1,
|
|
"type_config_index": 30,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=F64 Out=I16": {
|
|
"device": 1,
|
|
"type_config_index": 31,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=F64 Out=I32": {
|
|
"device": 1,
|
|
"type_config_index": 32,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=F64 Out=F32": {
|
|
"device": 1,
|
|
"type_config_index": 33,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=F64 Out=I64": {
|
|
"device": 1,
|
|
"type_config_index": 34,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "8388608"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1753"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00026657142213348556"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004288873685096382"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002619141041552483"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00422427515777647"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "32028088090.39048"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "512449409446.2477"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6999145124648269"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00026007216520352087"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2013"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=F64 Out=F64": {
|
|
"device": 1,
|
|
"type_config_index": 35,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|