Files
nvbench/scripts/test_cmp.json

18798 lines
570 KiB
JSON

{
"devices": [
{
"id": 0,
"name": "NVIDIA Quadro GV100",
"sm_version": 700,
"ptx_version": 700,
"sm_default_clock_rate": 1627000000,
"number_of_sms": 80,
"max_blocks_per_sm": 32,
"max_threads_per_sm": 2048,
"max_threads_per_block": 1024,
"registers_per_sm": 65536,
"registers_per_block": 65536,
"global_memory_size": 34078982144,
"global_memory_bus_peak_clock_rate": 850000000,
"global_memory_bus_width": 4096,
"global_memory_bus_bandwidth": 870400000000,
"l2_cache_size": 6291456,
"shared_memory_per_sm": 98304,
"shared_memory_per_block": 49152,
"ecc_state": false
},
{
"id": 1,
"name": "NVIDIA Quadro GP100",
"sm_version": 600,
"ptx_version": 600,
"sm_default_clock_rate": 1442500000,
"number_of_sms": 56,
"max_blocks_per_sm": 32,
"max_threads_per_sm": 2048,
"max_threads_per_block": 1024,
"registers_per_sm": 65536,
"registers_per_block": 65536,
"global_memory_size": 17069309952,
"global_memory_bus_peak_clock_rate": 715000000,
"global_memory_bus_width": 4096,
"global_memory_bus_bandwidth": 732160000000,
"l2_cache_size": 4194304,
"shared_memory_per_sm": 65536,
"shared_memory_per_block": 49152,
"ecc_state": false
}
],
"benchmarks": [
{
"index": 0,
"name": "simple",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"devices": [
0,
1
],
"axes": null,
"states": {
"Device=0": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": null,
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "486"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010095795164609047"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0006114730449640358"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010034803637751827"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0005535128658782786"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001001473929135854"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "524"
}
}
},
"is_skipped": false
},
"Device=1": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": null,
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "488"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010075532745901644"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0005196761038903798"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010027413077530309"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0003559489414701089"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010014738126565483"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "524"
}
}
},
"is_skipped": false
}
}
},
{
"index": 1,
"name": "single_float64_axis",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"devices": [
0,
1
],
"axes": {
"Duration": {
"type": "float64",
"flags": "",
"values": [
{
"input_string": "0",
"description": "",
"value": 0.0
},
{
"input_string": "0.0001",
"description": "",
"value": 0.0001
},
{
"input_string": "0.0002",
"description": "",
"value": 0.0002
},
{
"input_string": "0.0003",
"description": "",
"value": 0.00030000000000000003
},
{
"input_string": "0.0004",
"description": "",
"value": 0.0004
},
{
"input_string": "0.0005",
"description": "",
"value": 0.0005
},
{
"input_string": "0.0006",
"description": "",
"value": 0.0006000000000000001
},
{
"input_string": "0.0007",
"description": "",
"value": 0.0007000000000000001
},
{
"input_string": "0.0008",
"description": "",
"value": 0.0008000000000000001
},
{
"input_string": "0.0009",
"description": "",
"value": 0.0009000000000000002
},
{
"input_string": "0.001",
"description": "",
"value": 0.0010000000000000002
}
]
}
},
"states": {
"Device=0 Duration=0": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "14050"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "9.162447829181515e-06"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.03536831341378678"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "3.7685477789450405e-06"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.1242757507930245"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "1.6396544558213103e-06"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "305626"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0001": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0001"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "3833"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00010862307644142961"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.003939155614687134"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00010305688671520844"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004761970040891668"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00010137620362095862"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "5088"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0002": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0002"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "2173"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0002089772070869765"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0021045460135476644"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00020339080587790604"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0024397165593270475"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00020172840121363044"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "2582"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0003": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.00030000000000000003"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1519"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00030826679394338436"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0014114649766999914"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0003027125938382783"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0016421113166888573"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0003010571695496376"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1742"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0004": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0004"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1166"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0004085880488850769"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0010128960046765418"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004030490282469304"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0012417175292930155"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004014095938278854"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1304"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0005": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0005"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "945"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0005090367534391529"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0008945139151387666"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0005034566609317042"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0009551336090877046"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0005017609577982818"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1044"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0006": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0006000000000000001"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "796"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0006083192776381908"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0006965661259016556"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.000602732541573109"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0008277718240662281"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006010903174724053"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "872"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0007": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0007000000000000001"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "686"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0007086454693877553"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0005852496456482882"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007030655056151287"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0007134410597090336"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007014413925415692"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "748"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0008": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0008000000000000001"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "603"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0008089611791044773"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0005396538572101352"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0008033946325529826"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0006279991294953942"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0008017935563300363"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "654"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0009": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0009000000000000002"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "539"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0009083643543599264"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00046818252995143266"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009027890649266406"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0005476172787827472"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009011217884181701"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "582"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.001": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0010000000000000002"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "486"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010087072057613155"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00041872799651378016"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010031157275776806"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0004937944200398705"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001001475909284053"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "524"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "14964"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "8.23057591553059e-06"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.050523894758860086"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "3.353875434461126e-06"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.05725803858596207"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "1.355632943746511e-06"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "368832"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0001": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0001"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "3942"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.000107132078640284"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0038634259631888097"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0001024897333569082"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.003060119065396588"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0001013761587297066"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "5074"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0002": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0002"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "2208"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0002075046254528986"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.001976178510512602"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00020285036215099666"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0015468676288172283"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00020172864733863198"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "2595"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0003": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.00030000000000000003"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1537"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0003067784710474956"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0013859894180016342"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0003021601552535132"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0010334107287380953"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0003010565582609204"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1737"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0004": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0004"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1176"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0004071361462585035"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0010022100665278571"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00040249948315068836"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0007542402160340061"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004014089213396104"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1306"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0005": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0005"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "951"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0005075497234490012"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0007999383507906208"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0005028805712670812"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0005997416033360081"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0005017610334757409"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1046"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0006": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0006000000000000001"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "800"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0006068307462500002"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.000718570216952097"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006021880812197924"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0005176533625753945"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006010893901462271"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "873"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0007": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0007000000000000001"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "690"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0007071831971014495"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0005757596123903934"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007025585162466849"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00045240029617132243"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007014411477481618"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "748"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0008": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0008000000000000001"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "606"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0008075822128712869"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0005708387835625503"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0008029147460319033"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0004053734463924786"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00080179443359375"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "655"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0009": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0009000000000000002"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "541"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0009068751312384467"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0004621968439524866"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009022267493875558"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0003405832639595575"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009011221046513186"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "583"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.001": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0010000000000000002"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "488"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010072327028688517"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0004212022611220011"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010025862904845687"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0003195925477697865"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010014755598461355"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "524"
}
}
},
"is_skipped": false
}
}
},
{
"index": 2,
"name": "copy_sweep_grid_shape",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"devices": [
0,
1
],
"axes": {
"BlockSize": {
"type": "int64",
"flags": "pow2",
"values": [
{
"input_string": "6",
"description": "2^6 = 64",
"value": 64
},
{
"input_string": "8",
"description": "2^8 = 256",
"value": 256
},
{
"input_string": "10",
"description": "2^10 = 1024",
"value": 1024
}
]
},
"NumBlocks": {
"type": "int64",
"flags": "pow2",
"values": [
{
"input_string": "6",
"description": "2^6 = 64",
"value": 64
},
{
"input_string": "8",
"description": "2^8 = 256",
"value": 256
},
{
"input_string": "10",
"description": "2^10 = 1024",
"value": 1024
}
]
}
},
"states": {
"Device=0 BlockSize=2^6 NumBlocks=2^6": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "64"
},
"NumBlocks": {
"type": "int64",
"value": "64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "70"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.007152438914285718"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.05103516144522029"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.007146643659046716"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.05105065000806433"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "9390263066.362482"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "75122104530.89986"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.08630756494818458"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.006479399461012621"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "78"
}
}
},
"is_skipped": false
},
"Device=0 BlockSize=2^8 NumBlocks=2^6": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "256"
},
"NumBlocks": {
"type": "int64",
"value": "64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "229"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.002168227908296944"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0073699479168641575"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0021626523655054347"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007425464144896749"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "31030814323.371826"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "248246514586.9746"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.28520969047216754"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.002159405241287294"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "243"
}
}
},
"is_skipped": false
},
"Device=0 BlockSize=2^10 NumBlocks=2^6": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "1024"
},
"NumBlocks": {
"type": "int64",
"value": "64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "448"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010965164419642858"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.012922365203073934"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010910109984023236"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.013003332317485732"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "61510712631.013084"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "492085701048.1047"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.565355814623282"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010869085366833847"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "486"
}
}
},
"is_skipped": false
},
"Device=0 BlockSize=2^6 NumBlocks=2^8": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "64"
},
"NumBlocks": {
"type": "int64",
"value": "256"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "229"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.002169319052401745"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.003922215380349919"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0021638464469576494"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.003914908768819146"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "31013690502.09386"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "248109524016.7509"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.2850523024089509"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0021620516050990224"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "243"
}
}
},
"is_skipped": false
},
"Device=0 BlockSize=2^8 NumBlocks=2^8": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "256"
},
"NumBlocks": {
"type": "int64",
"value": "256"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "456"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.001076449870614035"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.011436755828819038"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010709658272956548"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.01147415502832018"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "62662003109.342606"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "501296024874.74084"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5759375285785165"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010696090290923384"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "487"
}
}
},
"is_skipped": false
},
"Device=0 BlockSize=2^10 NumBlocks=2^8": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "1024"
},
"NumBlocks": {
"type": "int64",
"value": "256"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "500"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0009796881099999996"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.006630669601394768"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009742486392259615"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.006702329573169841"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "68882686922.01393"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "551061495376.1115"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6331129312685104"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009725467921183118"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "542"
}
}
},
"is_skipped": false
},
"Device=0 BlockSize=2^6 NumBlocks=2^10": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "64"
},
"NumBlocks": {
"type": "int64",
"value": "1024"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "459"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010702333769063192"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009289316979889557"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010647220575212134"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009351871253339476"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "63029467198.450455"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "504235737587.60364"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5793149558681108"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010658674782853784"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "492"
}
}
},
"is_skipped": false
},
"Device=0 BlockSize=2^8 NumBlocks=2^10": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "256"
},
"NumBlocks": {
"type": "int64",
"value": "1024"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "500"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.000979172994"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007443039949868979"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009736743034124385"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007443203952714375"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "68923318367.14127"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "551386546937.1301"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6334863820509308"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009717381278159657"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "541"
}
}
},
"is_skipped": false
},
"Device=0 BlockSize=2^10 NumBlocks=2^10": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "1024"
},
"NumBlocks": {
"type": "int64",
"value": "1024"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "474"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010353058628691984"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.02225997387052118"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010297924077712025"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.022347468768318675"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "65167371106.61446"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "521338968852.9157"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5989648079652065"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010282588096414032"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "522"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^6 NumBlocks=2^6": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "64"
},
"NumBlocks": {
"type": "int64",
"value": "64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "76"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0066473009473684225"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0010215778080601146"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.006642639580525849"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.001033363366599417"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "10102740512.482763"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "80821924099.8621"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.11038833601926096"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.006642121520223497"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "79"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^8 NumBlocks=2^6": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "256"
},
"NumBlocks": {
"type": "int64",
"value": "64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "216"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0023017428981481495"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0022391000845061416"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.002297048738709203"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.002236143378518806"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "29215254717.542892"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "233722037740.34314"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.3192226258472781"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.002296809949372944"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "228"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^10 NumBlocks=2^6": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "1024"
},
"NumBlocks": {
"type": "int64",
"value": "64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "418"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.001179723488038277"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0038851033159110274"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001175016037870252"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0039137311385035334"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "57113147256.81244"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "456905178054.4995"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6240509971242618"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011733407974243164"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "448"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^6 NumBlocks=2^8": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "64"
},
"NumBlocks": {
"type": "int64",
"value": "256"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "224"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.002222525227678572"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0013751517218031045"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.002217807294002601"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0013709949704347455"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "30259105099.65222"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "242072840797.21777"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.33062833369375244"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.002216307048556171"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "237"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^8 NumBlocks=2^8": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "256"
},
"NumBlocks": {
"type": "int64",
"value": "256"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "434"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0011343795576036872"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0063161302839560935"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011296571429973376"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.006343846096918854"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "59406399911.69264"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "475251199293.54114"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6491083906434948"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011274932015425106"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "469"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^10 NumBlocks=2^8": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "1024"
},
"NumBlocks": {
"type": "int64",
"value": "256"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "437"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0011263629336384434"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.002337926005235178"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011216670730294026"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.002335621873443411"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "59829574758.535194"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "478636598068.28156"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6537322416797989"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011199346014793882"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "470"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^6 NumBlocks=2^10": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "64"
},
"NumBlocks": {
"type": "int64",
"value": "1024"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "439"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0011228420820045561"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0030427104293198376"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011181568284784197"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0030409709404223893"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "60017398535.51786"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "480139188284.1429"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6557845119702563"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011164653372257314"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "470"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^8 NumBlocks=2^10": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "256"
},
"NumBlocks": {
"type": "int64",
"value": "1024"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "440"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00112084285909091"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0025157975228794673"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011161849425597621"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0025072621927497768"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "60123427078.40005"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "480987416627.2004"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6569430406293711"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011137209100238348"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "472"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^10 NumBlocks=2^10": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "1024"
},
"NumBlocks": {
"type": "int64",
"value": "1024"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "464"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010599195581896552"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0019836197107494535"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010552384144273295"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0019838471234599063"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "63595925889.809"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "508767407118.472"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.69488555386592"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00105384634014122"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "499"
}
}
},
"is_skipped": false
}
}
},
{
"index": 3,
"name": "copy_type_sweep",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"devices": [
0,
1
],
"axes": {
"T": {
"type": "type",
"flags": "",
"values": [
{
"input_string": "U8",
"description": "uint8_t",
"is_active": true
},
{
"input_string": "U16",
"description": "uint16_t",
"is_active": true
},
{
"input_string": "U32",
"description": "uint32_t",
"is_active": true
},
{
"input_string": "U64",
"description": "uint64_t",
"is_active": true
},
{
"input_string": "F32",
"description": "float",
"is_active": true
},
{
"input_string": "F64",
"description": "double",
"is_active": true
}
]
}
},
"states": {
"Device=0 T=U8": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "U8"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "217"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0022855767235023037"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00300372701685277"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00228007503588628"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0030435512411696388"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "117730974540.34332"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "235461949080.68665"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.27052154076365653"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0022790989087975544"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "230"
}
}
},
"is_skipped": false
},
"Device=0 T=U16": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "U16"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "342"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0014443213274853803"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005313412134692269"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0014388333057102406"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0053179597407149795"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "93282333309.4497"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "373129333237.7988"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.4286871935176917"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001437488301595052"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "360"
}
}
},
"is_skipped": false
},
"Device=0 T=U32": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "U32"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "456"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010777073771929832"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.011572026564316875"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010722620346044238"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.011623749514354471"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "62586253951.21598"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "500690031609.72784"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5752413046986763"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001070086296237245"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "490"
}
}
},
"is_skipped": false
},
"Device=0 T=U64": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "U64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "514"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0009532965797665363"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0051196185177249205"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009478394400748767"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005131328754237089"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "35400966219.92148"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "566415459518.7437"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6507530555132625"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009458808417792793"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "555"
}
}
},
"is_skipped": false
},
"Device=0 T=F32": {
"device": 0,
"type_config_index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "F32"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "456"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010760199342105259"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.010177745756624636"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010705234403150128"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.010199116668820488"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "62687897782.2779"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "501503182258.2232"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5761755310871131"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010701700846354166"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "489"
}
}
},
"is_skipped": false
},
"Device=0 T=F64": {
"device": 0,
"type_config_index": 5,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "514"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0009537802023346293"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00826653498170841"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009483462104769544"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008316637345502817"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "35382048907.14371"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "566112782514.2993"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6504053107930828"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009457701526988636"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "550"
}
}
},
"is_skipped": false
},
"Device=1 T=U8": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "U8"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "184"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0027052529021739146"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.003601477829562271"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0027005073000555477"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0036073295118906625"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "99401862751.66835"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "198803725503.3367"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.2715304380235696"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0027012594746802137"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "193"
}
}
},
"is_skipped": false
},
"Device=1 T=U16": {
"device": 1,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "U16"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "325"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0015229335907692304"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00461287264596262"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001518287454751822"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004612569935175065"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "88400735697.27223"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "353602942789.0889"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.48295856477967786"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001517369088409953"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "346"
}
}
},
"is_skipped": false
},
"Device=1 T=U32": {
"device": 1,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "U32"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "435"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0011324326574712646"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.006407601007533829"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011277568740406254"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.006437993538809961"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "59506499623.0584"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "476051996984.4672"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6502021374897116"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011264256719333023"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "469"
}
}
},
"is_skipped": false
},
"Device=1 T=U64": {
"device": 1,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "U64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "468"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010516201538461542"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.002564258397948022"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010469369577546404"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0025731489764373736"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "32050097908.439487"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "512801566535.0318"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.700395496250863"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010448107472453933"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "502"
}
}
},
"is_skipped": false
},
"Device=1 T=F32": {
"device": 1,
"type_config_index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "F32"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "435"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.001132957098850574"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.006385301490538342"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011282652709675928"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.006406051844181131"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "59479685962.90115"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "475837487703.2092"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6499091560631682"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011265583781452921"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "462"
}
}
},
"is_skipped": false
},
"Device=1 T=F64": {
"device": 1,
"type_config_index": 5,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "468"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010518258760683764"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.002638851741610878"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010471444782028856"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0026422486584488855"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "32043746300.974895"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "512699940815.5983"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.7002566936401856"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010447956953898514"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "505"
}
}
},
"is_skipped": false
}
}
},
{
"index": 4,
"name": "copy_type_conversion_sweep",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"devices": [
0,
1
],
"axes": {
"In": {
"type": "type",
"flags": "",
"values": [
{
"input_string": "I8",
"description": "int8_t",
"is_active": true
},
{
"input_string": "I16",
"description": "int16_t",
"is_active": true
},
{
"input_string": "I32",
"description": "int32_t",
"is_active": true
},
{
"input_string": "F32",
"description": "float",
"is_active": true
},
{
"input_string": "I64",
"description": "int64_t",
"is_active": true
},
{
"input_string": "F64",
"description": "double",
"is_active": true
}
]
},
"Out": {
"type": "type",
"flags": "",
"values": [
{
"input_string": "I8",
"description": "int8_t",
"is_active": true
},
{
"input_string": "I16",
"description": "int16_t",
"is_active": true
},
{
"input_string": "I32",
"description": "int32_t",
"is_active": true
},
{
"input_string": "F32",
"description": "float",
"is_active": true
},
{
"input_string": "I64",
"description": "int64_t",
"is_active": true
},
{
"input_string": "F64",
"description": "double",
"is_active": true
}
]
}
},
"states": {
"Device=0 In=I8 Out=I8": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=0 In=I8 Out=I16": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "775"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.000624855941935483"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0026998034506879763"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006194506773641069"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0027411303205337333"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "108336089461.65392"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "325008268384.96173"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.37340104364081084"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006171294842507731"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "849"
}
}
},
"is_skipped": false
},
"Device=0 In=I8 Out=I32": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "268435456"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "660"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0007373176257575753"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0039374442120074255"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007318062056194664"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.003968882228293087"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "91703054011.67381"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "458515270058.369"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5267868451957365"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007297926682692308"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "715"
}
}
},
"is_skipped": false
},
"Device=0 In=I8 Out=F32": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "268435456"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "655"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0007424307770992365"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004224076490124651"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007369820818646267"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004224007291086274"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "91059017106.91382"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "455295085534.5691"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5230871846674737"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007352958009195734"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "706"
}
}
},
"is_skipped": false
},
"Device=0 In=I8 Out=I64": {
"device": 0,
"type_config_index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "536870912"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "407"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0012081452088452092"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009488803674295956"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0012026181030624918"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009557481496348873"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "55802306508.69623"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "502220758578.26605"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5769999524106917"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0012018776918068911"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "429"
}
}
},
"is_skipped": false
},
"Device=0 In=I8 Out=F64": {
"device": 0,
"type_config_index": 5,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "536870912"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "415"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0011839576867469879"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007849889234045669"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011785369482385114"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007894267956792296"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "56942520215.682335"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "512482681941.14105"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5887898459801713"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011769149367873732"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "444"
}
}
},
"is_skipped": false
},
"Device=0 In=I16 Out=I8": {
"device": 0,
"type_config_index": 6,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=I16 Out=I16": {
"device": 0,
"type_config_index": 7,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=0 In=I16 Out=I32": {
"device": 0,
"type_config_index": 8,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "33554432"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1105"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00043164991312217215"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.010807224020750296"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004261522234295282"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.010967121645138193"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "78738136645.08269"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "472428819870.49615"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.542772081652684"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004235287455769328"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1232"
}
}
},
"is_skipped": false
},
"Device=0 In=I16 Out=F32": {
"device": 0,
"type_config_index": 9,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "33554432"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1103"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0004325297388939259"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008132056614414322"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004270972975631891"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008246119473642231"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "78563906143.74146"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "471383436862.44867"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5415710441893942"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004253676273859287"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1241"
}
}
},
"is_skipped": false
},
"Device=0 In=I16 Out=I64": {
"device": 0,
"type_config_index": 10,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "33554432"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "268435456"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "733"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0006613082878581173"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007945982231516395"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006558368572759433"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008081739278450547"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "51162772612.94874"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "511627726129.48737"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5878075897627383"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006543398455948913"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "794"
}
}
},
"is_skipped": false
},
"Device=0 In=I16 Out=F64": {
"device": 0,
"type_config_index": 11,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "33554432"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "268435456"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "734"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0006602106062670296"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007402870806519008"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006547767629577938"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007440276584065623"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "51245605980.92404"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "512456059809.2404"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.588759259891131"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006532440560455698"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "814"
}
}
},
"is_skipped": false
},
"Device=0 In=I32 Out=I8": {
"device": 0,
"type_config_index": 12,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=I32 Out=I16": {
"device": 0,
"type_config_index": 13,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=I32 Out=I32": {
"device": 0,
"type_config_index": 14,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=0 In=I32 Out=F32": {
"device": 0,
"type_config_index": 15,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1735"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0002670209631123916"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.013093461911533781"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0002615653075986357"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.013401071612529727"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "64141594900.43743"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "513132759203.49945"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5895367178349029"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00025968648747412565"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "2023"
}
}
},
"is_skipped": false
},
"Device=0 In=I32 Out=I64": {
"device": 0,
"type_config_index": 16,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1233"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00038442103811841063"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00844365555913354"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.000379033355382238"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008658982321334757"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "44263165132.474785"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "531157981589.6974"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6102458428190457"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00037769805444141474"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1381"
}
}
},
"is_skipped": false
},
"Device=0 In=I32 Out=F64": {
"device": 0,
"type_config_index": 17,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1233"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0003844277526358471"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009146640310310188"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00037900421011283686"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009309598787035932"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "44266568951.84647"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "531198827422.15765"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6102927704758245"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0003778626589998872"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1385"
}
}
},
"is_skipped": false
},
"Device=0 In=F32 Out=I8": {
"device": 0,
"type_config_index": 18,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=F32 Out=I16": {
"device": 0,
"type_config_index": 19,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=F32 Out=I32": {
"device": 0,
"type_config_index": 20,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1726"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00026862367844727737"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.013398984730737511"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0002631768242198105"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.013647890011152018"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "63748835216.53615"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "509990681732.2892"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5859267942696337"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0002611921188044063"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1917"
}
}
},
"is_skipped": false
},
"Device=0 In=F32 Out=F32": {
"device": 0,
"type_config_index": 21,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=0 In=F32 Out=I64": {
"device": 0,
"type_config_index": 22,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1235"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0003840306607287451"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009229816202298303"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00037859300990336473"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009349966037483663"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "44314648081.54371"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "531775776978.5245"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6109556261242239"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00037728447759194497"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1385"
}
}
},
"is_skipped": false
},
"Device=0 In=F32 Out=F64": {
"device": 0,
"type_config_index": 23,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1233"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0003844510843471213"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009332888492996015"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00037903595060134466"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009455810939030309"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "44262862067.25975"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "531154344807.117"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6102416645302355"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00037756963876577526"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1352"
}
}
},
"is_skipped": false
},
"Device=0 In=I64 Out=I8": {
"device": 0,
"type_config_index": 24,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=I64 Out=I16": {
"device": 0,
"type_config_index": 25,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=I64 Out=I32": {
"device": 0,
"type_config_index": 26,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=I64 Out=F32": {
"device": 0,
"type_config_index": 27,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=I64 Out=I64": {
"device": 0,
"type_config_index": 28,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=0 In=I64 Out=F64": {
"device": 0,
"type_config_index": 29,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "8388608"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1863"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00024704845947396656"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008865365856503242"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00024161205912269328"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009044628498958832"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "34719326636.507706"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "555509226184.1233"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.638222916112274"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00023959052166815616"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "2168"
}
}
},
"is_skipped": false
},
"Device=0 In=F64 Out=I8": {
"device": 0,
"type_config_index": 30,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=F64 Out=I16": {
"device": 0,
"type_config_index": 31,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=F64 Out=I32": {
"device": 0,
"type_config_index": 32,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=F64 Out=F32": {
"device": 0,
"type_config_index": 33,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=F64 Out=I64": {
"device": 0,
"type_config_index": 34,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "8388608"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1863"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0002471723081052067"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008513791485233733"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00024173997316990882"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008792904583053216"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "34700955286.79488"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "555215284588.7181"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6378852074778472"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0002398694754203643"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "2141"
}
}
},
"is_skipped": false
},
"Device=0 In=F64 Out=F64": {
"device": 0,
"type_config_index": 35,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=1 In=I8 Out=I8": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=1 In=I8 Out=I16": {
"device": 1,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "704"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0006917278508522718"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.031528564248938934"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006868111818859521"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.03156956554686085"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "97710791218.80649"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "293132373656.4195"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.4003665505578282"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.000659236081199501"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "789"
}
}
},
"is_skipped": false
},
"Device=1 In=I8 Out=I32": {
"device": 1,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "268435456"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "568"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0008635000933098584"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007603813185561577"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0008587616908718169"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0076405577469716105"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "78146084895.6489"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "390730424478.2445"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5336680841322177"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0008576233512476871"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "608"
}
}
},
"is_skipped": false
},
"Device=1 In=I8 Out=F32": {
"device": 1,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "268435456"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "568"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0008623903292253519"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007683250202065139"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0008576831001211219"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0077139887360741476"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "78244358540.49461"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "391221792702.473"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5343392055049074"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0008563666250191483"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "612"
}
}
},
"is_skipped": false
},
"Device=1 In=I8 Out=I64": {
"device": 1,
"type_config_index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "536870912"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "339"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0014575299587020652"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005325090452654586"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0014528400236878067"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005339082380112657"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "46191502784.76956"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "415723525062.9261"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.56780420271925"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00145044431581602"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "364"
}
}
},
"is_skipped": false
},
"Device=1 In=I8 Out=F64": {
"device": 1,
"type_config_index": 5,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "536870912"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "339"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0014595411091445434"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00536065113916752"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0014548183609250722"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005369023538480178"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "46128689190.67507"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "415158202716.0756"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.567032073202682"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00145255855984158"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "360"
}
}
},
"is_skipped": false
},
"Device=1 In=I16 Out=I8": {
"device": 1,
"type_config_index": 6,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=I16 Out=I16": {
"device": 1,
"type_config_index": 7,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=1 In=I16 Out=I32": {
"device": 1,
"type_config_index": 8,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "33554432"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1042"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00046152892994241876"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007446740614945881"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00045683037259413987"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007515900606668647"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "73450527839.16064"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "440703167034.96387"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6019219392413733"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004549347768605374"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1156"
}
}
},
"is_skipped": false
},
"Device=1 In=I16 Out=F32": {
"device": 1,
"type_config_index": 9,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "33554432"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1046"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0004599197934990448"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007608321516935087"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00045521636728916755"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007662230748478094"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "73710952441.8422"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "442265714651.0532"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.604056100648838"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004532274742649026"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1168"
}
}
},
"is_skipped": false
},
"Device=1 In=I16 Out=I64": {
"device": 1,
"type_config_index": 10,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "33554432"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "268435456"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "648"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0007539361157407405"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005745552244274178"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007492513590388824"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005767470540109363"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "44783945461.29704"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "447839454612.97046"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6116688355181524"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007466272232380319"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "705"
}
}
},
"is_skipped": false
},
"Device=1 In=I16 Out=F64": {
"device": 1,
"type_config_index": 11,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "33554432"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "268435456"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "650"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0007517909569230775"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0052243182117119895"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007470858345581929"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005225121011834867"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "44913757493.26477"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "449137574932.64764"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6134418363918374"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007439345558090966"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "707"
}
}
},
"is_skipped": false
},
"Device=1 In=I32 Out=I8": {
"device": 1,
"type_config_index": 12,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=I32 Out=I16": {
"device": 1,
"type_config_index": 13,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=I32 Out=I32": {
"device": 1,
"type_config_index": 14,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=1 In=I32 Out=F32": {
"device": 1,
"type_config_index": 15,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1687"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0002777349045643155"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005691592315009916"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.000273079350458212"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005675718906016491"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "61437146279.45599"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "491497170235.64795"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6712974899416083"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0002715061958589702"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1930"
}
}
},
"is_skipped": false
},
"Device=1 In=I32 Out=I64": {
"device": 1,
"type_config_index": 16,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1133"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0004230005507502205"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004752057993974487"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00041831812217818477"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004789005696384007"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "40106357125.149025"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "481276285501.78827"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6573375840004757"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00041601362464715726"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1251"
}
}
},
"is_skipped": false
},
"Device=1 In=I32 Out=F64": {
"device": 1,
"type_config_index": 17,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1132"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0004233390768551238"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004579872590098746"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00041865752666346193"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004575612011474384"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "40073843013.66299"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "480886116163.9559"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6568046822606478"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004163219633556548"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1260"
}
}
},
"is_skipped": false
},
"Device=1 In=F32 Out=I8": {
"device": 1,
"type_config_index": 18,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=F32 Out=I16": {
"device": 1,
"type_config_index": 19,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=F32 Out=I32": {
"device": 1,
"type_config_index": 20,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1665"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00028157421321321324"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.01259188984622349"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0002768973456309726"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0128083650869227"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "60590021048.303505"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "484720168386.42804"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6620413139019177"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0002751490314863719"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1917"
}
}
},
"is_skipped": false
},
"Device=1 In=F32 Out=F32": {
"device": 1,
"type_config_index": 21,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=1 In=F32 Out=I64": {
"device": 1,
"type_config_index": 22,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1133"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0004230867334510152"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004737837167796919"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004184002545904713"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0047584325554732645"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "40098484204.84705"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "481181810458.1647"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6572085479378342"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004160796998517786"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1265"
}
}
},
"is_skipped": false
},
"Device=1 In=F32 Out=F64": {
"device": 1,
"type_config_index": 23,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1132"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0004234168127208481"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004661976745113187"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00041872537112383403"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0046895661377312735"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "40067350003.108116"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "480808200037.2974"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6566982627257668"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00041628507170249205"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1259"
}
}
},
"is_skipped": false
},
"Device=1 In=I64 Out=I8": {
"device": 1,
"type_config_index": 24,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=I64 Out=I16": {
"device": 1,
"type_config_index": 25,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=I64 Out=I32": {
"device": 1,
"type_config_index": 26,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=I64 Out=F32": {
"device": 1,
"type_config_index": 27,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=I64 Out=I64": {
"device": 1,
"type_config_index": 28,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=1 In=I64 Out=F64": {
"device": 1,
"type_config_index": 29,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "8388608"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1753"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0002666010844266969"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004049936615976281"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00026192253768546044"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00396300201275967"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "32027056831.87056"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "512432909309.92896"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6998919762209476"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00026010225147830515"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1995"
}
}
},
"is_skipped": false
},
"Device=1 In=F64 Out=I8": {
"device": 1,
"type_config_index": 30,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=F64 Out=I16": {
"device": 1,
"type_config_index": 31,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=F64 Out=I32": {
"device": 1,
"type_config_index": 32,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=F64 Out=F32": {
"device": 1,
"type_config_index": 33,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=F64 Out=I64": {
"device": 1,
"type_config_index": 34,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "8388608"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1753"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00026658457387335985"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004159876144452023"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0002619018185261111"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004108154173032601"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "32029590505.3583"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "512473448085.7328"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6999473449597531"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0002600505164606654"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "2010"
}
}
},
"is_skipped": false
},
"Device=1 In=F64 Out=F64": {
"device": 1,
"type_config_index": 35,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
}
}
}
]
}