mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
18798 lines
570 KiB
JSON
18798 lines
570 KiB
JSON
{
|
|
"devices": [
|
|
{
|
|
"id": 0,
|
|
"name": "NVIDIA Quadro GV100",
|
|
"sm_version": 700,
|
|
"ptx_version": 700,
|
|
"sm_default_clock_rate": 1627000000,
|
|
"number_of_sms": 80,
|
|
"max_blocks_per_sm": 32,
|
|
"max_threads_per_sm": 2048,
|
|
"max_threads_per_block": 1024,
|
|
"registers_per_sm": 65536,
|
|
"registers_per_block": 65536,
|
|
"global_memory_size": 34078982144,
|
|
"global_memory_bus_peak_clock_rate": 850000000,
|
|
"global_memory_bus_width": 4096,
|
|
"global_memory_bus_bandwidth": 870400000000,
|
|
"l2_cache_size": 6291456,
|
|
"shared_memory_per_sm": 98304,
|
|
"shared_memory_per_block": 49152,
|
|
"ecc_state": false
|
|
},
|
|
{
|
|
"id": 1,
|
|
"name": "NVIDIA Quadro GP100",
|
|
"sm_version": 600,
|
|
"ptx_version": 600,
|
|
"sm_default_clock_rate": 1442500000,
|
|
"number_of_sms": 56,
|
|
"max_blocks_per_sm": 32,
|
|
"max_threads_per_sm": 2048,
|
|
"max_threads_per_block": 1024,
|
|
"registers_per_sm": 65536,
|
|
"registers_per_block": 65536,
|
|
"global_memory_size": 17069309952,
|
|
"global_memory_bus_peak_clock_rate": 715000000,
|
|
"global_memory_bus_width": 4096,
|
|
"global_memory_bus_bandwidth": 732160000000,
|
|
"l2_cache_size": 4194304,
|
|
"shared_memory_per_sm": 65536,
|
|
"shared_memory_per_block": 49152,
|
|
"ecc_state": false
|
|
}
|
|
],
|
|
"benchmarks": [
|
|
{
|
|
"index": 0,
|
|
"name": "simple",
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"devices": [
|
|
0,
|
|
1
|
|
],
|
|
"axes": null,
|
|
"states": {
|
|
"Device=0": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": null,
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "486"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010095795164609047"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006114730449640358"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010034803637751827"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005535128658782786"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001001473929135854"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "524"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": null,
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "488"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010075532745901644"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005196761038903798"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010027413077530309"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003559489414701089"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010014738126565483"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "524"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"index": 1,
|
|
"name": "single_float64_axis",
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"devices": [
|
|
0,
|
|
1
|
|
],
|
|
"axes": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "0",
|
|
"description": "",
|
|
"value": 0.0
|
|
},
|
|
{
|
|
"input_string": "0.0001",
|
|
"description": "",
|
|
"value": 0.0001
|
|
},
|
|
{
|
|
"input_string": "0.0002",
|
|
"description": "",
|
|
"value": 0.0002
|
|
},
|
|
{
|
|
"input_string": "0.0003",
|
|
"description": "",
|
|
"value": 0.00030000000000000003
|
|
},
|
|
{
|
|
"input_string": "0.0004",
|
|
"description": "",
|
|
"value": 0.0004
|
|
},
|
|
{
|
|
"input_string": "0.0005",
|
|
"description": "",
|
|
"value": 0.0005
|
|
},
|
|
{
|
|
"input_string": "0.0006",
|
|
"description": "",
|
|
"value": 0.0006000000000000001
|
|
},
|
|
{
|
|
"input_string": "0.0007",
|
|
"description": "",
|
|
"value": 0.0007000000000000001
|
|
},
|
|
{
|
|
"input_string": "0.0008",
|
|
"description": "",
|
|
"value": 0.0008000000000000001
|
|
},
|
|
{
|
|
"input_string": "0.0009",
|
|
"description": "",
|
|
"value": 0.0009000000000000002
|
|
},
|
|
{
|
|
"input_string": "0.001",
|
|
"description": "",
|
|
"value": 0.0010000000000000002
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"states": {
|
|
"Device=0 Duration=0": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "14050"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "9.162447829181515e-06"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.03536831341378678"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "3.7685477789450405e-06"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.1242757507930245"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "1.6396544558213103e-06"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "305626"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0001": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0001"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "3833"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00010862307644142961"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.003939155614687134"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00010305688671520844"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004761970040891668"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00010137620362095862"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "5088"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0002": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0002"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2173"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002089772070869765"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0021045460135476644"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00020339080587790604"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0024397165593270475"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00020172840121363044"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2582"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0003": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.00030000000000000003"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1519"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00030826679394338436"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014114649766999914"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003027125938382783"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0016421113166888573"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003010571695496376"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1742"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0004": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0004"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1166"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004085880488850769"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010128960046765418"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004030490282469304"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0012417175292930155"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004014095938278854"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1304"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0005": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0005"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "945"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005090367534391529"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008945139151387666"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005034566609317042"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009551336090877046"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005017609577982818"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1044"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0006": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0006000000000000001"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "796"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006083192776381908"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006965661259016556"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000602732541573109"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008277718240662281"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006010903174724053"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "872"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0007": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0007000000000000001"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "686"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007086454693877553"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005852496456482882"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007030655056151287"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007134410597090336"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007014413925415692"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "748"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0008": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0008000000000000001"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "603"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008089611791044773"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005396538572101352"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008033946325529826"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006279991294953942"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008017935563300363"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "654"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.0009": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0009000000000000002"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "539"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009083643543599264"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00046818252995143266"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009027890649266406"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005476172787827472"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009011217884181701"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "582"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Duration=0.001": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0010000000000000002"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "486"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010087072057613155"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00041872799651378016"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010031157275776806"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004937944200398705"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001001475909284053"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "524"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "14964"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "8.23057591553059e-06"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.050523894758860086"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "3.353875434461126e-06"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.05725803858596207"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "1.355632943746511e-06"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "368832"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0001": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0001"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "3942"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000107132078640284"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0038634259631888097"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0001024897333569082"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.003060119065396588"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0001013761587297066"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "5074"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0002": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0002"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2208"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002075046254528986"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001976178510512602"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00020285036215099666"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0015468676288172283"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00020172864733863198"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2595"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0003": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.00030000000000000003"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1537"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003067784710474956"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0013859894180016342"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003021601552535132"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010334107287380953"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003010565582609204"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1737"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0004": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0004"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1176"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004071361462585035"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010022100665278571"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00040249948315068836"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007542402160340061"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004014089213396104"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1306"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0005": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0005"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "951"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005075497234490012"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007999383507906208"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005028805712670812"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005997416033360081"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005017610334757409"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1046"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0006": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0006000000000000001"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "800"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006068307462500002"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000718570216952097"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006021880812197924"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005176533625753945"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006010893901462271"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "873"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0007": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0007000000000000001"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "690"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007071831971014495"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005757596123903934"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007025585162466849"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00045240029617132243"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007014411477481618"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "748"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0008": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0008000000000000001"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "606"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008075822128712869"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0005708387835625503"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008029147460319033"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004053734463924786"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00080179443359375"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "655"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.0009": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0009000000000000002"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "541"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009068751312384467"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004621968439524866"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009022267493875558"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003405832639595575"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009011221046513186"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "583"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 Duration=0.001": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Duration": {
|
|
"type": "float64",
|
|
"value": "0.0010000000000000002"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "488"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010072327028688517"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004212022611220011"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010025862904845687"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003195925477697865"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010014755598461355"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "524"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"index": 2,
|
|
"name": "copy_sweep_grid_shape",
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"devices": [
|
|
0,
|
|
1
|
|
],
|
|
"axes": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"flags": "pow2",
|
|
"values": [
|
|
{
|
|
"input_string": "6",
|
|
"description": "2^6 = 64",
|
|
"value": 64
|
|
},
|
|
{
|
|
"input_string": "8",
|
|
"description": "2^8 = 256",
|
|
"value": 256
|
|
},
|
|
{
|
|
"input_string": "10",
|
|
"description": "2^10 = 1024",
|
|
"value": 1024
|
|
}
|
|
]
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"flags": "pow2",
|
|
"values": [
|
|
{
|
|
"input_string": "6",
|
|
"description": "2^6 = 64",
|
|
"value": 64
|
|
},
|
|
{
|
|
"input_string": "8",
|
|
"description": "2^8 = 256",
|
|
"value": 256
|
|
},
|
|
{
|
|
"input_string": "10",
|
|
"description": "2^10 = 1024",
|
|
"value": 1024
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"states": {
|
|
"Device=0 BlockSize=2^6 NumBlocks=2^6": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "70"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007152438914285718"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.05103516144522029"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007146643659046716"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.05105065000806433"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "9390263066.362482"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "75122104530.89986"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.08630756494818458"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006479399461012621"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "78"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 BlockSize=2^8 NumBlocks=2^6": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "229"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002168227908296944"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0073699479168641575"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0021626523655054347"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007425464144896749"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "31030814323.371826"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "248246514586.9746"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.28520969047216754"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002159405241287294"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "243"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 BlockSize=2^10 NumBlocks=2^6": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "448"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010965164419642858"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.012922365203073934"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010910109984023236"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.013003332317485732"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "61510712631.013084"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "492085701048.1047"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.565355814623282"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010869085366833847"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "486"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 BlockSize=2^6 NumBlocks=2^8": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "229"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002169319052401745"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.003922215380349919"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0021638464469576494"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.003914908768819146"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "31013690502.09386"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "248109524016.7509"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.2850523024089509"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0021620516050990224"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "243"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 BlockSize=2^8 NumBlocks=2^8": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "456"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001076449870614035"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.011436755828819038"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010709658272956548"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.01147415502832018"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "62662003109.342606"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "501296024874.74084"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5759375285785165"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010696090290923384"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "487"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 BlockSize=2^10 NumBlocks=2^8": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "500"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009796881099999996"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006630669601394768"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009742486392259615"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006702329573169841"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "68882686922.01393"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "551061495376.1115"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6331129312685104"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009725467921183118"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "542"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 BlockSize=2^6 NumBlocks=2^10": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "459"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010702333769063192"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009289316979889557"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010647220575212134"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009351871253339476"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "63029467198.450455"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "504235737587.60364"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5793149558681108"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010658674782853784"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "492"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 BlockSize=2^8 NumBlocks=2^10": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "500"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000979172994"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007443039949868979"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009736743034124385"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007443203952714375"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "68923318367.14127"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "551386546937.1301"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6334863820509308"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009717381278159657"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "541"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 BlockSize=2^10 NumBlocks=2^10": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "474"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010353058628691984"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.02225997387052118"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010297924077712025"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.022347468768318675"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "65167371106.61446"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "521338968852.9157"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5989648079652065"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010282588096414032"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "522"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^6 NumBlocks=2^6": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "76"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0066473009473684225"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010215778080601146"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006642639580525849"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001033363366599417"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "10102740512.482763"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "80821924099.8621"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.11038833601926096"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006642121520223497"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "79"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^8 NumBlocks=2^6": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "216"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0023017428981481495"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0022391000845061416"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002297048738709203"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002236143378518806"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "29215254717.542892"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "233722037740.34314"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.3192226258472781"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002296809949372944"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "228"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^10 NumBlocks=2^6": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "418"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001179723488038277"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0038851033159110274"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001175016037870252"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0039137311385035334"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "57113147256.81244"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "456905178054.4995"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6240509971242618"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011733407974243164"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "448"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^6 NumBlocks=2^8": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "224"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002222525227678572"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0013751517218031045"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002217807294002601"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0013709949704347455"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "30259105099.65222"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "242072840797.21777"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.33062833369375244"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002216307048556171"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "237"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^8 NumBlocks=2^8": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "434"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011343795576036872"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0063161302839560935"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011296571429973376"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006343846096918854"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "59406399911.69264"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "475251199293.54114"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6491083906434948"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011274932015425106"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "469"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^10 NumBlocks=2^8": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "437"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011263629336384434"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002337926005235178"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011216670730294026"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002335621873443411"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "59829574758.535194"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "478636598068.28156"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6537322416797989"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011199346014793882"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "470"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^6 NumBlocks=2^10": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "64"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "439"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011228420820045561"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0030427104293198376"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011181568284784197"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0030409709404223893"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "60017398535.51786"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "480139188284.1429"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6557845119702563"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011164653372257314"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "470"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^8 NumBlocks=2^10": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "256"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "440"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00112084285909091"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0025157975228794673"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011161849425597621"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0025072621927497768"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "60123427078.40005"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "480987416627.2004"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6569430406293711"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011137209100238348"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "472"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 BlockSize=2^10 NumBlocks=2^10": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"BlockSize": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
},
|
|
"NumBlocks": {
|
|
"type": "int64",
|
|
"value": "1024"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "464"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010599195581896552"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0019836197107494535"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010552384144273295"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0019838471234599063"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "63595925889.809"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "508767407118.472"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.69488555386592"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00105384634014122"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "499"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"index": 3,
|
|
"name": "copy_type_sweep",
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"devices": [
|
|
0,
|
|
1
|
|
],
|
|
"axes": {
|
|
"T": {
|
|
"type": "type",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "U8",
|
|
"description": "uint8_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "U16",
|
|
"description": "uint16_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "U32",
|
|
"description": "uint32_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "U64",
|
|
"description": "uint64_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "F32",
|
|
"description": "float",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "F64",
|
|
"description": "double",
|
|
"is_active": true
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"states": {
|
|
"Device=0 T=U8": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "217"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0022855767235023037"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00300372701685277"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00228007503588628"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0030435512411696388"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "117730974540.34332"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "235461949080.68665"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.27052154076365653"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0022790989087975544"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "230"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 T=U16": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "342"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014443213274853803"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005313412134692269"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014388333057102406"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0053179597407149795"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "93282333309.4497"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "373129333237.7988"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.4286871935176917"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001437488301595052"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "360"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 T=U32": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "456"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010777073771929832"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.011572026564316875"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010722620346044238"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.011623749514354471"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "62586253951.21598"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "500690031609.72784"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5752413046986763"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001070086296237245"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "490"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 T=U64": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "514"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009532965797665363"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0051196185177249205"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009478394400748767"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005131328754237089"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "35400966219.92148"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "566415459518.7437"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6507530555132625"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009458808417792793"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "555"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 T=F32": {
|
|
"device": 0,
|
|
"type_config_index": 4,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "456"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010760199342105259"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.010177745756624636"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010705234403150128"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.010199116668820488"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "62687897782.2779"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "501503182258.2232"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5761755310871131"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010701700846354166"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "489"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 T=F64": {
|
|
"device": 0,
|
|
"type_config_index": 5,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "514"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009537802023346293"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00826653498170841"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009483462104769544"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008316637345502817"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "35382048907.14371"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "566112782514.2993"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6504053107930828"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0009457701526988636"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "550"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 T=U8": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "184"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0027052529021739146"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.003601477829562271"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0027005073000555477"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0036073295118906625"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "99401862751.66835"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "198803725503.3367"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.2715304380235696"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0027012594746802137"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "193"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 T=U16": {
|
|
"device": 1,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "325"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0015229335907692304"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00461287264596262"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001518287454751822"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004612569935175065"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "88400735697.27223"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "353602942789.0889"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.48295856477967786"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001517369088409953"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "346"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 T=U32": {
|
|
"device": 1,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "435"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011324326574712646"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006407601007533829"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011277568740406254"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006437993538809961"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "59506499623.0584"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "476051996984.4672"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6502021374897116"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011264256719333023"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "469"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 T=U64": {
|
|
"device": 1,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "468"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010516201538461542"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002564258397948022"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010469369577546404"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0025731489764373736"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "32050097908.439487"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "512801566535.0318"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.700395496250863"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010448107472453933"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "502"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 T=F32": {
|
|
"device": 1,
|
|
"type_config_index": 4,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "435"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.001132957098850574"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006385301490538342"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011282652709675928"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.006406051844181131"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "59479685962.90115"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "475837487703.2092"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6499091560631682"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011265583781452921"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "462"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 T=F64": {
|
|
"device": 1,
|
|
"type_config_index": 5,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"T": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "468"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010518258760683764"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.002638851741610878"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010471444782028856"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0026422486584488855"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "32043746300.974895"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "512699940815.5983"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.7002566936401856"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0010447956953898514"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "505"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"index": 4,
|
|
"name": "copy_type_conversion_sweep",
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"devices": [
|
|
0,
|
|
1
|
|
],
|
|
"axes": {
|
|
"In": {
|
|
"type": "type",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "I8",
|
|
"description": "int8_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "I16",
|
|
"description": "int16_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "I32",
|
|
"description": "int32_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "F32",
|
|
"description": "float",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "I64",
|
|
"description": "int64_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "F64",
|
|
"description": "double",
|
|
"is_active": true
|
|
}
|
|
]
|
|
},
|
|
"Out": {
|
|
"type": "type",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "I8",
|
|
"description": "int8_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "I16",
|
|
"description": "int16_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "I32",
|
|
"description": "int32_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "F32",
|
|
"description": "float",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "I64",
|
|
"description": "int64_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "F64",
|
|
"description": "double",
|
|
"is_active": true
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"states": {
|
|
"Device=0 In=I8 Out=I8": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=0 In=I8 Out=I16": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "775"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000624855941935483"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0026998034506879763"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006194506773641069"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0027411303205337333"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "108336089461.65392"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "325008268384.96173"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.37340104364081084"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006171294842507731"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "849"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I8 Out=I32": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "268435456"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "660"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007373176257575753"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0039374442120074255"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007318062056194664"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.003968882228293087"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "91703054011.67381"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "458515270058.369"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5267868451957365"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007297926682692308"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "715"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I8 Out=F32": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "268435456"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "655"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007424307770992365"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004224076490124651"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007369820818646267"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004224007291086274"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "91059017106.91382"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "455295085534.5691"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5230871846674737"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007352958009195734"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "706"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I8 Out=I64": {
|
|
"device": 0,
|
|
"type_config_index": 4,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "536870912"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "407"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0012081452088452092"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009488803674295956"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0012026181030624918"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009557481496348873"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "55802306508.69623"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "502220758578.26605"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5769999524106917"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0012018776918068911"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "429"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I8 Out=F64": {
|
|
"device": 0,
|
|
"type_config_index": 5,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "536870912"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "415"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011839576867469879"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007849889234045669"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011785369482385114"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007894267956792296"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "56942520215.682335"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "512482681941.14105"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5887898459801713"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0011769149367873732"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "444"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I16 Out=I8": {
|
|
"device": 0,
|
|
"type_config_index": 6,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=I16 Out=I16": {
|
|
"device": 0,
|
|
"type_config_index": 7,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=0 In=I16 Out=I32": {
|
|
"device": 0,
|
|
"type_config_index": 8,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "33554432"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1105"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00043164991312217215"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.010807224020750296"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004261522234295282"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.010967121645138193"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "78738136645.08269"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "472428819870.49615"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.542772081652684"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004235287455769328"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1232"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I16 Out=F32": {
|
|
"device": 0,
|
|
"type_config_index": 9,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "33554432"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1103"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004325297388939259"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008132056614414322"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004270972975631891"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008246119473642231"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "78563906143.74146"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "471383436862.44867"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5415710441893942"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004253676273859287"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1241"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I16 Out=I64": {
|
|
"device": 0,
|
|
"type_config_index": 10,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "33554432"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "268435456"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "733"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006613082878581173"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007945982231516395"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006558368572759433"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008081739278450547"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "51162772612.94874"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "511627726129.48737"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5878075897627383"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006543398455948913"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "794"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I16 Out=F64": {
|
|
"device": 0,
|
|
"type_config_index": 11,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "33554432"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "268435456"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "734"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006602106062670296"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007402870806519008"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006547767629577938"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007440276584065623"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "51245605980.92404"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "512456059809.2404"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.588759259891131"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006532440560455698"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "814"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I32 Out=I8": {
|
|
"device": 0,
|
|
"type_config_index": 12,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=I32 Out=I16": {
|
|
"device": 0,
|
|
"type_config_index": 13,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=I32 Out=I32": {
|
|
"device": 0,
|
|
"type_config_index": 14,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=0 In=I32 Out=F32": {
|
|
"device": 0,
|
|
"type_config_index": 15,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1735"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002670209631123916"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.013093461911533781"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002615653075986357"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.013401071612529727"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "64141594900.43743"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "513132759203.49945"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5895367178349029"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00025968648747412565"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2023"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I32 Out=I64": {
|
|
"device": 0,
|
|
"type_config_index": 16,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1233"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00038442103811841063"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00844365555913354"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000379033355382238"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008658982321334757"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "44263165132.474785"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "531157981589.6974"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6102458428190457"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00037769805444141474"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1381"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I32 Out=F64": {
|
|
"device": 0,
|
|
"type_config_index": 17,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1233"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003844277526358471"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009146640310310188"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00037900421011283686"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009309598787035932"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "44266568951.84647"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "531198827422.15765"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6102927704758245"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003778626589998872"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1385"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=F32 Out=I8": {
|
|
"device": 0,
|
|
"type_config_index": 18,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=F32 Out=I16": {
|
|
"device": 0,
|
|
"type_config_index": 19,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=F32 Out=I32": {
|
|
"device": 0,
|
|
"type_config_index": 20,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1726"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00026862367844727737"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.013398984730737511"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002631768242198105"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.013647890011152018"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "63748835216.53615"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "509990681732.2892"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5859267942696337"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002611921188044063"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1917"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=F32 Out=F32": {
|
|
"device": 0,
|
|
"type_config_index": 21,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=0 In=F32 Out=I64": {
|
|
"device": 0,
|
|
"type_config_index": 22,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1235"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003840306607287451"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009229816202298303"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00037859300990336473"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009349966037483663"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "44314648081.54371"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "531775776978.5245"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6109556261242239"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00037728447759194497"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1385"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=F32 Out=F64": {
|
|
"device": 0,
|
|
"type_config_index": 23,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1233"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0003844510843471213"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009332888492996015"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00037903595060134466"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009455810939030309"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "44262862067.25975"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "531154344807.117"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6102416645302355"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00037756963876577526"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1352"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=I64 Out=I8": {
|
|
"device": 0,
|
|
"type_config_index": 24,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=I64 Out=I16": {
|
|
"device": 0,
|
|
"type_config_index": 25,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=I64 Out=I32": {
|
|
"device": 0,
|
|
"type_config_index": 26,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=I64 Out=F32": {
|
|
"device": 0,
|
|
"type_config_index": 27,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=I64 Out=I64": {
|
|
"device": 0,
|
|
"type_config_index": 28,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=0 In=I64 Out=F64": {
|
|
"device": 0,
|
|
"type_config_index": 29,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "8388608"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1863"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00024704845947396656"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008865365856503242"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00024161205912269328"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.009044628498958832"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "34719326636.507706"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "555509226184.1233"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.638222916112274"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00023959052166815616"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2168"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=F64 Out=I8": {
|
|
"device": 0,
|
|
"type_config_index": 30,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=F64 Out=I16": {
|
|
"device": 0,
|
|
"type_config_index": 31,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=F64 Out=I32": {
|
|
"device": 0,
|
|
"type_config_index": 32,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=F64 Out=F32": {
|
|
"device": 0,
|
|
"type_config_index": 33,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=0 In=F64 Out=I64": {
|
|
"device": 0,
|
|
"type_config_index": 34,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "8388608"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1863"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002471723081052067"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008513791485233733"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00024173997316990882"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.008792904583053216"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "34700955286.79488"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "555215284588.7181"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6378852074778472"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002398694754203643"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2141"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 In=F64 Out=F64": {
|
|
"device": 0,
|
|
"type_config_index": 35,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=1 In=I8 Out=I8": {
|
|
"device": 1,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=1 In=I8 Out=I16": {
|
|
"device": 1,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "704"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006917278508522718"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.031528564248938934"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0006868111818859521"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.03156956554686085"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "97710791218.80649"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "293132373656.4195"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.4003665505578282"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000659236081199501"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "789"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I8 Out=I32": {
|
|
"device": 1,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "268435456"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "568"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008635000933098584"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007603813185561577"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008587616908718169"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0076405577469716105"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "78146084895.6489"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "390730424478.2445"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5336680841322177"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008576233512476871"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "608"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I8 Out=F32": {
|
|
"device": 1,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "268435456"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "568"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008623903292253519"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007683250202065139"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008576831001211219"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0077139887360741476"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "78244358540.49461"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "391221792702.473"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.5343392055049074"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0008563666250191483"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "612"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I8 Out=I64": {
|
|
"device": 1,
|
|
"type_config_index": 4,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "536870912"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "339"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014575299587020652"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005325090452654586"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014528400236878067"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005339082380112657"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "46191502784.76956"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "415723525062.9261"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.56780420271925"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00145044431581602"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "364"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I8 Out=F64": {
|
|
"device": 1,
|
|
"type_config_index": 5,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "536870912"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "339"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014595411091445434"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00536065113916752"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0014548183609250722"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005369023538480178"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "46128689190.67507"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "415158202716.0756"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.567032073202682"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00145255855984158"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "360"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I16 Out=I8": {
|
|
"device": 1,
|
|
"type_config_index": 6,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=I16 Out=I16": {
|
|
"device": 1,
|
|
"type_config_index": 7,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=1 In=I16 Out=I32": {
|
|
"device": 1,
|
|
"type_config_index": 8,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "33554432"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1042"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00046152892994241876"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007446740614945881"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00045683037259413987"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007515900606668647"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "73450527839.16064"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "440703167034.96387"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6019219392413733"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004549347768605374"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1156"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I16 Out=F32": {
|
|
"device": 1,
|
|
"type_config_index": 9,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "33554432"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1046"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004599197934990448"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007608321516935087"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00045521636728916755"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.007662230748478094"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "73710952441.8422"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "442265714651.0532"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.604056100648838"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004532274742649026"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1168"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I16 Out=I64": {
|
|
"device": 1,
|
|
"type_config_index": 10,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "33554432"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "268435456"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "648"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007539361157407405"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005745552244274178"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007492513590388824"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005767470540109363"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "44783945461.29704"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "447839454612.97046"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6116688355181524"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007466272232380319"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "705"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I16 Out=F64": {
|
|
"device": 1,
|
|
"type_config_index": 11,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "33554432"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "268435456"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "650"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007517909569230775"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0052243182117119895"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007470858345581929"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005225121011834867"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "44913757493.26477"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "449137574932.64764"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6134418363918374"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0007439345558090966"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "707"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I32 Out=I8": {
|
|
"device": 1,
|
|
"type_config_index": 12,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=I32 Out=I16": {
|
|
"device": 1,
|
|
"type_config_index": 13,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=I32 Out=I32": {
|
|
"device": 1,
|
|
"type_config_index": 14,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=1 In=I32 Out=F32": {
|
|
"device": 1,
|
|
"type_config_index": 15,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1687"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002777349045643155"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005691592315009916"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.000273079350458212"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.005675718906016491"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "61437146279.45599"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "491497170235.64795"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6712974899416083"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002715061958589702"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1930"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I32 Out=I64": {
|
|
"device": 1,
|
|
"type_config_index": 16,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1133"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004230005507502205"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004752057993974487"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00041831812217818477"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004789005696384007"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "40106357125.149025"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "481276285501.78827"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6573375840004757"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00041601362464715726"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1251"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I32 Out=F64": {
|
|
"device": 1,
|
|
"type_config_index": 17,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1132"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004233390768551238"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004579872590098746"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00041865752666346193"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004575612011474384"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "40073843013.66299"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "480886116163.9559"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6568046822606478"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004163219633556548"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1260"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=F32 Out=I8": {
|
|
"device": 1,
|
|
"type_config_index": 18,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=F32 Out=I16": {
|
|
"device": 1,
|
|
"type_config_index": 19,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=F32 Out=I32": {
|
|
"device": 1,
|
|
"type_config_index": 20,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1665"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00028157421321321324"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.01259188984622349"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002768973456309726"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0128083650869227"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "60590021048.303505"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "484720168386.42804"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6620413139019177"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002751490314863719"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1917"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=F32 Out=F32": {
|
|
"device": 1,
|
|
"type_config_index": 21,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=1 In=F32 Out=I64": {
|
|
"device": 1,
|
|
"type_config_index": 22,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1133"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004230867334510152"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004737837167796919"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004184002545904713"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0047584325554732645"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "40098484204.84705"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "481181810458.1647"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6572085479378342"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004160796998517786"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1265"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=F32 Out=F64": {
|
|
"device": 1,
|
|
"type_config_index": 23,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "16777216"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "134217728"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1132"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0004234168127208481"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004661976745113187"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00041872537112383403"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0046895661377312735"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "40067350003.108116"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "480808200037.2974"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6566982627257668"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00041628507170249205"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1259"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=I64 Out=I8": {
|
|
"device": 1,
|
|
"type_config_index": 24,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=I64 Out=I16": {
|
|
"device": 1,
|
|
"type_config_index": 25,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=I64 Out=I32": {
|
|
"device": 1,
|
|
"type_config_index": 26,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=I64 Out=F32": {
|
|
"device": 1,
|
|
"type_config_index": 27,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=I64 Out=I64": {
|
|
"device": 1,
|
|
"type_config_index": 28,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
},
|
|
"Device=1 In=I64 Out=F64": {
|
|
"device": 1,
|
|
"type_config_index": 29,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "8388608"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1753"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002666010844266969"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004049936615976281"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00026192253768546044"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00396300201275967"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "32027056831.87056"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "512432909309.92896"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6998919762209476"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00026010225147830515"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1995"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=F64 Out=I8": {
|
|
"device": 1,
|
|
"type_config_index": 30,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=F64 Out=I16": {
|
|
"device": 1,
|
|
"type_config_index": 31,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=F64 Out=I32": {
|
|
"device": 1,
|
|
"type_config_index": 32,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=F64 Out=F32": {
|
|
"device": 1,
|
|
"type_config_index": 33,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
|
|
},
|
|
"Device=1 In=F64 Out=I64": {
|
|
"device": 1,
|
|
"type_config_index": 34,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Element count: Items": {
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Items"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "8388608"
|
|
}
|
|
},
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "InSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Output Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "OutSize"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "67108864"
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "1753"
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.00026658457387335985"
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004159876144452023"
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002619018185261111"
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.004108154173032601"
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "32029590505.3583"
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "512473448085.7328"
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.6999473449597531"
|
|
}
|
|
},
|
|
"Average GPU Time (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch GPU"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average back-to-back kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": "0.0002600505164606654"
|
|
}
|
|
},
|
|
"Number of Samples (Batch)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Batch"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in hot time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": "2010"
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=1 In=F64 Out=F64": {
|
|
"device": 1,
|
|
"type_config_index": 35,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"In": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Out": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Not a conversion: InputType == OutputType."
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|