mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
- Handle multiple benchmarks. - Split output to resemble nvbench markdown output: - Headings for benchmarks / devices - Compact tables with axis values / stats - Handle `null` stdev (these are infinite, e.g. too few samples) - Format numbers, times, percentages similar to nvbench. - Add summary of total/pass/unknown/failure stats - Add new test.json files (with multiple benchmarks in each)
23046 lines
696 KiB
JSON
23046 lines
696 KiB
JSON
{
|
|
"devices": [
|
|
{
|
|
"id": 0,
|
|
"name": "NVIDIA GeForce GTX 1650",
|
|
"sm_version": 750,
|
|
"ptx_version": 750,
|
|
"sm_default_clock_rate": 1560000000,
|
|
"number_of_sms": 16,
|
|
"max_blocks_per_sm": 16,
|
|
"max_threads_per_sm": 1024,
|
|
"max_threads_per_block": 1024,
|
|
"registers_per_sm": 65536,
|
|
"registers_per_block": 65536,
|
|
"global_memory_size": 4294967296,
|
|
"global_memory_bus_peak_clock_rate": 4001000000,
|
|
"global_memory_bus_width": 128,
|
|
"global_memory_bus_bandwidth": 128032000000,
|
|
"l2_cache_size": 1048576,
|
|
"shared_memory_per_sm": 65536,
|
|
"shared_memory_per_block": 49152,
|
|
"ecc_state": false
|
|
}
|
|
],
|
|
"benchmarks": [
|
|
{
|
|
"index": 0,
|
|
"name": "cub::DeviceRadixSort::SortKeys - Overview",
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"devices": [
|
|
0
|
|
],
|
|
"axes": {
|
|
"Key": {
|
|
"type": "type",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "bool",
|
|
"description": "",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "U8",
|
|
"description": "uint8_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "U16",
|
|
"description": "uint16_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "U32",
|
|
"description": "uint32_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "U64",
|
|
"description": "uint64_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "I8",
|
|
"description": "int8_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "I16",
|
|
"description": "int16_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "I32",
|
|
"description": "int32_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "I64",
|
|
"description": "int64_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "F32",
|
|
"description": "float",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "F64",
|
|
"description": "double",
|
|
"is_active": true
|
|
}
|
|
]
|
|
},
|
|
"Input": {
|
|
"type": "type",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "Rand",
|
|
"description": "Random values uniformly distributed across `T`'s value range",
|
|
"is_active": true
|
|
}
|
|
]
|
|
},
|
|
"Pattern": {
|
|
"type": "type",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "Ascend",
|
|
"description": "",
|
|
"is_active": true
|
|
}
|
|
]
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"flags": "pow2",
|
|
"values": [
|
|
{
|
|
"input_string": "16",
|
|
"description": "2^16 = 65536",
|
|
"value": 65536
|
|
},
|
|
{
|
|
"input_string": "18",
|
|
"description": "2^18 = 262144",
|
|
"value": 262144
|
|
},
|
|
{
|
|
"input_string": "20",
|
|
"description": "2^20 = 1048576",
|
|
"value": 1048576
|
|
},
|
|
{
|
|
"input_string": "22",
|
|
"description": "2^22 = 4194304",
|
|
"value": 4194304
|
|
},
|
|
{
|
|
"input_string": "24",
|
|
"description": "2^24 = 16777216",
|
|
"value": 16777216
|
|
},
|
|
{
|
|
"input_string": "26",
|
|
"description": "2^26 = 67108864",
|
|
"value": 67108864
|
|
},
|
|
{
|
|
"input_string": "28",
|
|
"description": "2^28 = 268435456",
|
|
"value": 268435456
|
|
},
|
|
{
|
|
"input_string": "30",
|
|
"description": "2^30 = 1073741824",
|
|
"value": 1073741824
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"states": {
|
|
"Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^16": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "bool"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 65536
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 65536
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 3042
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 3.927120315581862e-05
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.4351576079801729
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 3.2775679267673803e-05
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.4041140207967676
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1999531404.5142384
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 3999062809.0284767
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.031234869478165433
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^18": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "bool"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1691
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 5.06085156712005e-05
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.3900269997798082
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 4.116893645171086e-05
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.15850062777398677
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 6367519362.747736
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 12735038725.495472
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.09946762313714909
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^20": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "bool"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 537
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00010505661080074492
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.13838288384658645
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 9.64652218021716e-05
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.08737598421104262
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 10869990037.96822
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 21739980075.93644
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.16980114405723912
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^22": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "bool"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 141
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0003081985815602837
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.025128010564140792
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0003021013356269674
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.02018452044120483
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 13883765165.404953
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 27767530330.809906
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.2168796108067507
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^24": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "bool"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 34
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.001104020588235294
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00868011645589697
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0010932272953145647
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.003434233699996367
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 15346503029.978348
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 30693006059.956696
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.23972917754902442
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^26": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "bool"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 8
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0042372500000000006
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0030855867536742037
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0042257159948349
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0019932191078502345
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 15881063488.892126
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 31762126977.784252
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.2480795971146608
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^28": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "bool"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.019142950000000002
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.019130672454833984
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 14031679055.388933
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 28063358110.777866
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.2191901876935287
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^30": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "bool"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0667937
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.06678323364257813
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 16078014876.407965
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 32156029752.81593
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.2511561933955256
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^16": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 65536
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 65536
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 3062
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 5.317815153494455e-05
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.3713839348328813
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 4.7155929496967694e-05
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.40429857529493773
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1389772202.54378
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 2779544405.08756
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.021709763223940578
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^18": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1569
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 7.891446781389421e-05
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11657350278726154
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 7.26659224292061e-05
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11037642266139251
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 3607523186.0627747
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 7215046372.125549
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.05635346141687664
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^20": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 505
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0001637732673267328
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.04440230894077945
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00015781791602620992
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.04411047078155534
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 6644213954.934341
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 13288427909.868683
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1037898955719561
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^22": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 129
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0005240697674418604
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.01553926208912671
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0005178778284741924
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.015023407302414188
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 8099022142.650033
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 16198044285.300066
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.12651559208088656
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^24": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 31
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0019522129032258073
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.23301756357696535
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.001942181168063994
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.23453440661836827
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 8638337285.8691
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 17276674571.7382
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.13494028502044958
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^26": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 8
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0072903875
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.007239378588174434
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.007273227989673614
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.007137742915442734
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 9226833545.611362
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 18453667091.222725
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.14413324083996754
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^28": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.029422500000000004
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.029412703514099123
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 9126514190.418577
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 18253028380.837154
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.14256614268961787
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^30": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.12713770000000002
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.12712566375732423
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 8446302597.481127
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 16892605194.962254
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.131940492962402
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^16": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 65536
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 131072
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2506
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 7.933663208300092e-05
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.27652333688390907
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 7.329776243909458e-05
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.2908651937565574
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 894106420.4307182
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 3576425681.7228727
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.027933842177915464
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^18": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 524288
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1269
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00014559330181245089
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.19873696733232882
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00013759964715081717
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.09333440846518179
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1905121164.3927765
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 7620484657.571106
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.05952015634818722
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^20": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2097152
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 406
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00038108940886699564
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.03821569814913922
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00037408323064813464
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.029659979896029746
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 2803055347.290609
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 11212221389.162436
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.08757358620627996
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^22": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 8388608
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 110
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0011471336363636359
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.01637197025341408
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0011375141815705729
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.014295623169967478
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 3687254249.6207814
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 14749016998.483126
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11519789582669275
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^24": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 33554432
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 28
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.004043317857142858
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.01100346103239151
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.004035516560077667
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.01078543812390853
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 4157389952.4964676
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 16629559809.98587
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.12988596452438353
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^26": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 134217728
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 7
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.015595228571428572
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.005255757704044775
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.015586642401559011
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00526989300271275
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 4305536899.54981
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 17222147598.19924
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.13451439951105382
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^28": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 536870912
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0652593
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0652172622680664
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 4116018469.107669
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 16464073876.430676
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.12859342880241406
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^30": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
},
|
|
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^16": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 65536
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2245
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 8.775260579064617e-05
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.2588213270041119
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 8.086750657362002e-05
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.2692444298865416
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 810412027.9798346
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 6483296223.838676
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.05063809222568324
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^18": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1055
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00021251706161137438
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.06362320460336848
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00020548479910145425
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.06357499446836187
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1275734269.1347759
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 10205874153.078207
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07971346345505972
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^20": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 328
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0006486628048780487
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.062073612578813026
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0006387051705543588
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.019862156160162552
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1641721483.3095798
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 13133771866.476639
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.10258194722004373
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^22": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 87
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0022856022988505752
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.01813131000490793
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.002277155692549959
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.018170531150182676
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1841904799.800148
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 14735238398.401184
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11509027741815471
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^24": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 22
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.008837786363636363
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.007500416221662679
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.008825995575297962
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.007696619972960275
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1900886518.3385963
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 15207092146.70877
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11877571346779532
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^26": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 6
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.035352516666666674
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.006577426327632489
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.03533673604329427
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.006429928650474585
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1899124580.090781
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 15192996640.726248
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11866561985071113
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^28": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1430682
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.14305706024169923
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1876422285.9498873
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 15011378287.599098
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11724708110159256
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^30": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
},
|
|
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^16": {
|
|
"device": 0,
|
|
"type_config_index": 4,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 65536
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 524288
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1413
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00018726730360934168
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.19158497176050596
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00017967336391272188
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1929620201640373
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 364750782.04599524
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 5836012512.735924
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.045582452142713725
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^18": {
|
|
"device": 0,
|
|
"type_config_index": 4,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2097152
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 593
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0005387549747048903
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.04111938777242051
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0005312656186282335
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.04063673179425505
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 493433022.59399897
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 7894928361.5039835
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0616637118962758
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^20": {
|
|
"device": 0,
|
|
"type_config_index": 4,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 8388608
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 181
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0018093104972375699
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.04001254538809503
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0017961900550357542
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.01423183226589721
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 583777867.5258992
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 9340445880.414387
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0729539949420019
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^22": {
|
|
"device": 0,
|
|
"type_config_index": 4,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 33554432
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 47
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00701123404255319
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.022882268388176443
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.006997501982019303
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.023591403635686216
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 599400187.492284
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 9590402999.876545
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07490629686231992
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^24": {
|
|
"device": 0,
|
|
"type_config_index": 4,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 134217728
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 12
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.027476083333333335
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.009319641724100018
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.02746358140309652
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.009143918373427004
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 610889590.6091974
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 9774233449.747158
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07634211329782521
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^26": {
|
|
"device": 0,
|
|
"type_config_index": 4,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 536870912
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 3
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11105753333333335
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11104602813720703
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 604333762.5464745
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 9669340200.743591
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07552283960840721
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^28": {
|
|
"device": 0,
|
|
"type_config_index": 4,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
},
|
|
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^30": {
|
|
"device": 0,
|
|
"type_config_index": 4,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
},
|
|
"Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^16": {
|
|
"device": 0,
|
|
"type_config_index": 5,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 65536
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 65536
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 3087
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 5.5682150955620366e-05
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.15974697309068234
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 4.976085047405057e-05
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1566862558828566
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1317019290.7811313
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 2634038581.5622625
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.020573283097680757
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^18": {
|
|
"device": 0,
|
|
"type_config_index": 5,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1508
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 8.478799734748009e-05
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.18692260194964838
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 7.815668975504499e-05
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07663177112866752
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 3354082687.247878
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 6708165374.495756
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.052394443377403746
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^20": {
|
|
"device": 0,
|
|
"type_config_index": 5,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 493
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.000172661663286004
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.03967760002514821
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0001667072121922918
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.03756877297429747
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 6289925829.9065
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 12579851659.813
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.09825552721048644
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^22": {
|
|
"device": 0,
|
|
"type_config_index": 5,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 126
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.000547765079365079
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.02267930122532969
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0005414133327347893
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.020125269100866486
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 7746953660.734054
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 15493907321.468107
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.12101589697472591
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^24": {
|
|
"device": 0,
|
|
"type_config_index": 5,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 31
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0019378612903225812
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.011107088633221586
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0019301894287909237
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.01047020541368632
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 8692004913.999191
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 17384009827.998383
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.13577863212320657
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^26": {
|
|
"device": 0,
|
|
"type_config_index": 5,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 8
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0075735125
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.008208621847337904
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.007558403968811036
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.006689815940438041
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 8878708293.036163
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 17757416586.072327
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.13869514329286683
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^28": {
|
|
"device": 0,
|
|
"type_config_index": 5,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.03057025
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.03056054401397705
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 8783726358.968918
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 17567452717.937836
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.13721142150351345
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^30": {
|
|
"device": 0,
|
|
"type_config_index": 5,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1411317
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.14112380981445313
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 7608509332.420483
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 15217018664.840965
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11885324500781809
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^16": {
|
|
"device": 0,
|
|
"type_config_index": 6,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 65536
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 131072
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2444
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 8.023633387888718e-05
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.15957149507307994
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 7.409614451997878e-05
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11997847029431298
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 884472470.5254984
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 3537889882.1019936
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.027632856489799375
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^18": {
|
|
"device": 0,
|
|
"type_config_index": 6,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 524288
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1258
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00014721875993640703
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.05030707755378517
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00014142066840858286
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.04965983055115567
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1853647016.026198
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 7414588064.104792
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.05791199125300543
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^20": {
|
|
"device": 0,
|
|
"type_config_index": 6,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2097152
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 395
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00038132582278481046
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.022492628088386854
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.000374839979561069
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.020997160180716862
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 2797396374.922077
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 11189585499.688309
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.08739678751943505
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^22": {
|
|
"device": 0,
|
|
"type_config_index": 6,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 8388608
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 107
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0011724710280373834
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.019342776294093793
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0011632810418850906
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.016095597179303357
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 3605580980.846342
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 14422323923.385368
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11264624409042559
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^24": {
|
|
"device": 0,
|
|
"type_config_index": 6,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 33554432
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 27
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00414218888888889
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.010143570260672414
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.004132891301755552
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.01004138981574669
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 4059437999.9477477
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 16237751999.79099
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.12682573106560072
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^26": {
|
|
"device": 0,
|
|
"type_config_index": 6,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 134217728
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 7
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.015818842857142856
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.004297912986520931
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.015806600979396273
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.004417201153941865
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 4245622704.5571437
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 16982490818.228575
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1326425488801907
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^28": {
|
|
"device": 0,
|
|
"type_config_index": 6,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 536870912
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.06584135
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.06581772994995116
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 4078467248.9331145
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 16313868995.732458
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.12742024646754294
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^30": {
|
|
"device": 0,
|
|
"type_config_index": 6,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
},
|
|
"Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^16": {
|
|
"device": 0,
|
|
"type_config_index": 7,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 65536
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2237
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 8.876477425122915e-05
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.35106187182872767
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 8.160484933232342e-05
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.2520628318430057
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 803089528.823398
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 6424716230.587184
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.05018055041385891
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^18": {
|
|
"device": 0,
|
|
"type_config_index": 7,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1033
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0002160535333978701
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1188318605057773
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00020900739510142425
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.12132837233117388
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1254233133.1041677
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 10033865064.833342
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07836997832442938
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^20": {
|
|
"device": 0,
|
|
"type_config_index": 7,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 324
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0006508543209876547
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.017018531103233366
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0006438862221476476
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.01711700991339697
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1628511317.578021
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 13028090540.624168
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.10175651821907154
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^22": {
|
|
"device": 0,
|
|
"type_config_index": 7,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 84
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.002322446428571428
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0213207920280134
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.002313739804994491
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.02154045421700801
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1812781191.2757347
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 14502249530.205877
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1132705068280264
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^24": {
|
|
"device": 0,
|
|
"type_config_index": 7,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 21
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.008975690476190476
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.008053073918408916
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.008964469319298155
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.007960830786781505
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1871523611.987053
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 14972188895.896423
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11694099050156542
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^26": {
|
|
"device": 0,
|
|
"type_config_index": 7,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 5
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.035879860000000006
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.007059995549678311
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.03586985015869141
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0070565543246424054
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1870898922.1617713
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 14967191377.29417
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11690195714582426
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^28": {
|
|
"device": 0,
|
|
"type_config_index": 7,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1458295
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.14581488037109375
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1840933211.458537
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 14727465691.668297
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11502956832407756
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^30": {
|
|
"device": 0,
|
|
"type_config_index": 7,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
},
|
|
"Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^16": {
|
|
"device": 0,
|
|
"type_config_index": 8,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 65536
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 524288
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1442
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00018916165048543735
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.08474054695372502
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00018243015832403314
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.08522171353418057
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 359238848.4561566
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 5747821575.298506
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.04489363264885736
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^18": {
|
|
"device": 0,
|
|
"type_config_index": 8,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2097152
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 588
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0005411481292517009
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.02452569809177557
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0005339167867185301
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.023483747081537745
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 490982876.9594332
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 7855726031.350931
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.061357520239869186
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^20": {
|
|
"device": 0,
|
|
"type_config_index": 8,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 8388608
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 182
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.001794480219780219
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.01043393384097355
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.001785578369439303
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.009622529095864946
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 587247257.217429
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 9395956115.478865
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0733875602621131
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^22": {
|
|
"device": 0,
|
|
"type_config_index": 8,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 33554432
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 47
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.007055212765957448
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.022872858397093812
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.007045798778533936
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.02290392030641084
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 595291482.4616572
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 9524663719.386515
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07439283709843254
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^24": {
|
|
"device": 0,
|
|
"type_config_index": 8,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 134217728
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 12
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.027424558333333335
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0056934303158599825
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.027411389350891106
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.005639627987530259
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 612052741.4804167
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 9792843863.686666
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07648747081734775
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^26": {
|
|
"device": 0,
|
|
"type_config_index": 8,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 536870912
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 3
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11087960000000001
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11086710357666014
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 605309075.7764491
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 9684945212.423185
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07564472329123333
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^28": {
|
|
"device": 0,
|
|
"type_config_index": 8,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
},
|
|
"Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^30": {
|
|
"device": 0,
|
|
"type_config_index": 8,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "I64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
},
|
|
"Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^16": {
|
|
"device": 0,
|
|
"type_config_index": 9,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 65536
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2324
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 8.308123924268518e-05
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.20123216459376678
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 7.645772117159609e-05
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.2163426509558451
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 857153456.7832047
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 6857227654.265637
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.05355870137360689
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^18": {
|
|
"device": 0,
|
|
"type_config_index": 9,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1081
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00018608593894542115
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.061054319816048364
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00017904156912376654
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.058580426390462105
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1464151600.563705
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 11713212804.50964
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.09148660338438547
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^20": {
|
|
"device": 0,
|
|
"type_config_index": 9,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 361
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0004975271468144047
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.014379246069577772
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0004902403537091129
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.012272733824773483
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 2138901851.0340319
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 17111214808.272255
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1336479537012017
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^22": {
|
|
"device": 0,
|
|
"type_config_index": 9,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 96
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.001751761458333334
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.005879019262093316
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.001744055998822054
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.005818847781913598
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 2404913605.315918
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 19239308842.527344
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.15026953294900763
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^24": {
|
|
"device": 0,
|
|
"type_config_index": 9,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 24
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.006750754166666667
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.002444731344335298
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0067418733040491745
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0023841178656418723
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 2488509534.8682375
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 19908076278.9459
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.15549297268609333
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^26": {
|
|
"device": 0,
|
|
"type_config_index": 9,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 6
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.026754200000000006
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0006428245182255163
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.026743643124898273
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0006056040012260031
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 2509338899.2138395
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 20074711193.710716
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1567944825802199
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^28": {
|
|
"device": 0,
|
|
"type_config_index": 9,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.10674365000000001
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.10673254394531251
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 2515029119.3053603
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 20120232954.442883
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1571500324484729
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^30": {
|
|
"device": 0,
|
|
"type_config_index": 9,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "F32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
},
|
|
"Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^16": {
|
|
"device": 0,
|
|
"type_config_index": 10,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 65536
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 524288
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1446
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00017735048409405266
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.11011368701446163
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00017031459973024972
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1112846952116118
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 384793788.1062353
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 6156700609.699765
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.048087201712851205
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^18": {
|
|
"device": 0,
|
|
"type_config_index": 10,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 262144
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2097152
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 614
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0005115635179153098
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.014363302118375774
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.000504564481760081
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.013743392821332084
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 519545091.8097892
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 8312721468.956627
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.06492690475003614
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^20": {
|
|
"device": 0,
|
|
"type_config_index": 10,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 8388608
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 187
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0017560828877005348
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.007271133357282276
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0017482294819571758
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.007054498367435898
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 599793111.1572946
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 9596689778.516714
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07495540004465066
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^22": {
|
|
"device": 0,
|
|
"type_config_index": 10,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 33554432
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 50
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0067117
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0020046252309587377
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.006701972465515136
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.001795830890697078
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 625831279.0125157
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 10013300464.20025
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07820935753718017
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^24": {
|
|
"device": 0,
|
|
"type_config_index": 10,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 134217728
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 13
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.02650104615384615
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0007265940345306042
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.026490544979388894
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00072958672729313
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 633328457.8725579
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 10133255325.960926
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07914627066640313
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^26": {
|
|
"device": 0,
|
|
"type_config_index": 10,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 536870912
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 3
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.10620103333333335
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.10618826548258464
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 631980037.4835782
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 10111680599.737251
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07897776024538593
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^28": {
|
|
"device": 0,
|
|
"type_config_index": 10,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
},
|
|
"Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^30": {
|
|
"device": 0,
|
|
"type_config_index": 10,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "F64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"index": 1,
|
|
"name": "cub::DeviceRadixSort::SortKeys - Constant Values",
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"devices": [
|
|
0
|
|
],
|
|
"axes": {
|
|
"Key": {
|
|
"type": "type",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "U8",
|
|
"description": "uint8_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "U16",
|
|
"description": "uint16_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "U32",
|
|
"description": "uint32_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "U64",
|
|
"description": "uint64_t",
|
|
"is_active": true
|
|
}
|
|
]
|
|
},
|
|
"Input": {
|
|
"type": "type",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "Const",
|
|
"description": "All values = 42",
|
|
"is_active": true
|
|
}
|
|
]
|
|
},
|
|
"Pattern": {
|
|
"type": "type",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "Ascend",
|
|
"description": "",
|
|
"is_active": true
|
|
}
|
|
]
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"flags": "pow2",
|
|
"values": [
|
|
{
|
|
"input_string": "20",
|
|
"description": "2^20 = 1048576",
|
|
"value": 1048576
|
|
},
|
|
{
|
|
"input_string": "22",
|
|
"description": "2^22 = 4194304",
|
|
"value": 4194304
|
|
},
|
|
{
|
|
"input_string": "24",
|
|
"description": "2^24 = 16777216",
|
|
"value": 16777216
|
|
},
|
|
{
|
|
"input_string": "26",
|
|
"description": "2^26 = 67108864",
|
|
"value": 67108864
|
|
},
|
|
{
|
|
"input_string": "28",
|
|
"description": "2^28 = 268435456",
|
|
"value": 268435456
|
|
},
|
|
{
|
|
"input_string": "30",
|
|
"description": "2^30 = 1073741824",
|
|
"value": 1073741824
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"states": {
|
|
"Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^20": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2566
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 9.6390140296181e-05
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.045598791758050095
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 9.036228858044716e-05
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.033692751648895305
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 11604132835.419285
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 23208265670.83857
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1812692582388666
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^22": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1160
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0003043432758620684
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.014476575112824767
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00029814910513573545
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.011313650678305812
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 14067806771.013115
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 28135613542.02623
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.21975454216153953
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^24": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 363
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.001116191460055097
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.011463805762636945
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0011063706461063093
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.008802868537347153
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 15164191185.878504
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 30328382371.757008
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.23688126696261097
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^26": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 100
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.004274347
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0032816652112393967
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.004266441283226011
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0032308789304643024
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 15729470897.40716
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 31458941794.81432
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.24571155488326604
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^28": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 26
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.017081826923076922
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.010556997792740205
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.017073184013366702
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.010548301451256467
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 15722635906.099308
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 31445271812.198616
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.24560478483659254
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^30": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 7
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.06782757142857143
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.002248092939308053
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.06781701987130302
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.002251004801379183
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 15832925510.994875
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 31665851021.98975
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.24732762920199441
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^20": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2097152
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1374
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00025053580786026195
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.06364830482823801
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0002446616114899394
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.06504028362673961
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 4285821521.4654465
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 17143286085.861786
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.13389844793381175
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^22": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 8388608
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 484
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.000845314876033058
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0042877181599594
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0008389603308409695
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.004147125806742644
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 4999406820.338754
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 19997627281.355015
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1561924150318281
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^24": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 33554432
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 138
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.003167936956521741
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.009915660724473297
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0031528834553732395
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.001304193155702654
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 5321229356.387328
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 21284917425.549313
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1662468556731857
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^26": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 134217728
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 37
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.012415567567567568
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0020929194181039177
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.012407164470569507
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0020887980997006564
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 5408880019.216801
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 21635520076.867203
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1689852542869533
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^28": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 536870912
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 10
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.04940072
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.000576076970718073
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.04938988418579101
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0005818752831350183
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 5435029063.648347
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 21740116254.593388
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.16980220768708906
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^30": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
},
|
|
"Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^20": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 804
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00047247549751243796
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.012916467373333794
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0004640141693291389
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.005379896492658131
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 2259793060.8800316
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 18078344487.040253
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.14120176586353608
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^22": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 251
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.001718760956175299
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.008648709803676304
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0017113251937813012
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.008654677513055743
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 2450909982.0662203
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 19607279856.529762
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.15314358798214323
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^24": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 68
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.006638683823529413
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.002767845306863416
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0066234131490483005
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0015437824831612992
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 2533016682.2540236
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 20264133458.03219
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.15827397414733962
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^26": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 18
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.02638116111111112
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0035695573142047353
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.02635982047186958
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0034495125905173608
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 2545877126.576662
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 20367017012.613297
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.15907755102328555
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^28": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 5
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.10501874
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0005592643361183458
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.10500200805664064
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00055006156023586
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 2556479261.3794527
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 20451834091.03562
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.15974001883150793
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^30": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
},
|
|
"Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^20": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 8388608
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 254
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0017053511811023635
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.005201838970683627
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0016952332624300263
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0035478470341980604
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 618543785.8250388
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 9896700573.20062
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07729864856598835
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^22": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 33554432
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 71
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.006611254929577467
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.003663027154345941
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0066021115477655976
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0036695333152667172
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 635297354.4379919
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 10164757671.00787
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07939232122444287
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^24": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 134217728
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 19
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.026109178947368427
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0007116177577395647
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.026097561384502206
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0007337748011409093
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 642865275.9089972
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 10285844414.543955
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.08033807496988217
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^26": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 536870912
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 5
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.10435436000000002
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0008827267294601751
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.10434051666259767
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0009079744888650902
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 643171666.6403677
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 10290746666.245884
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.08037636423898623
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^28": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
},
|
|
"Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^30": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Const"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"index": 2,
|
|
"name": "cub::DeviceRadixSort::SortKeys - Half Word",
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"devices": [
|
|
0
|
|
],
|
|
"axes": {
|
|
"Key": {
|
|
"type": "type",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "U8",
|
|
"description": "uint8_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "U16",
|
|
"description": "uint16_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "U32",
|
|
"description": "uint32_t",
|
|
"is_active": true
|
|
},
|
|
{
|
|
"input_string": "U64",
|
|
"description": "uint64_t",
|
|
"is_active": true
|
|
}
|
|
]
|
|
},
|
|
"Input": {
|
|
"type": "type",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "Rand",
|
|
"description": "Random values uniformly distributed across `T`'s value range",
|
|
"is_active": true
|
|
}
|
|
]
|
|
},
|
|
"Pattern": {
|
|
"type": "type",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "Ascend",
|
|
"description": "",
|
|
"is_active": true
|
|
}
|
|
]
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"flags": "pow2",
|
|
"values": [
|
|
{
|
|
"input_string": "20",
|
|
"description": "2^20 = 1048576",
|
|
"value": 1048576
|
|
},
|
|
{
|
|
"input_string": "22",
|
|
"description": "2^22 = 4194304",
|
|
"value": 4194304
|
|
},
|
|
{
|
|
"input_string": "24",
|
|
"description": "2^24 = 16777216",
|
|
"value": 16777216
|
|
},
|
|
{
|
|
"input_string": "26",
|
|
"description": "2^26 = 67108864",
|
|
"value": 67108864
|
|
},
|
|
{
|
|
"input_string": "28",
|
|
"description": "2^28 = 268435456",
|
|
"value": 268435456
|
|
},
|
|
{
|
|
"input_string": "30",
|
|
"description": "2^30 = 1073741824",
|
|
"value": 1073741824
|
|
}
|
|
]
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"flags": "",
|
|
"values": [
|
|
{
|
|
"input_string": "Half",
|
|
"description": "",
|
|
"value": "Half"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"states": {
|
|
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 538
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 8.960966542750939e-05
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.08397008166379398
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 8.340330078477751e-05
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.045910043356146846
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 12572356131.394053
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 25144712262.788105
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.19639396606151668
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 131
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00028253587786259543
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.04123287912799956
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0002716179535589143
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.037808745584459125
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 15441924751.451488
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 30883849502.902977
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.24121976929910474
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 33
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0009550272727272727
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.007063558495008569
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0009488116340203719
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.007360718544290043
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 17682346419.92151
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 35364692839.84302
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.27621760840917126
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 8
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.003773775
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.014705369350832631
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0037657760083675386
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.014874000832241253
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 17820726418.90659
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 35641452837.81318
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.2783792554815451
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.014729750000000002
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.01471895980834961
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 18237393096.74077
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 36474786193.48154
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.2848880451252932
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 0,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U8"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.06453160000000001
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.06452188873291016
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 16641512594.97035
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 33283025189.9407
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.2599586446352529
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2097152
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 450
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00025646377777777785
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.021618265944340534
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00025038620548115814
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.022009562632867193
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 4187834541.3837366
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 16751338165.534946
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.13083712013820722
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 8388608
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 116
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0007816620689655175
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.020853227399617164
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0007727365504051081
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.016658278118279097
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 5427857654.463388
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 21711430617.853554
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.16957815716269023
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 33554432
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 29
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.002729172413793104
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.007205020379178172
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.002722046876775808
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.007340614282676166
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 6163455943.077721
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 24653823772.310883
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.19255985825661462
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 134217728
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 7
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0105532
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.005970051554930724
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.010544246673583986
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.005939659771636435
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 6364500573.390865
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 25458002293.56346
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1988409326852932
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 536870912
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0433476
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.04333521652221679
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 6194395171.935518
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 24777580687.742073
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1935264675061084
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 1,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U16"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
},
|
|
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 403
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.00035955682382134016
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.04131883733262655
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.000348795553294956
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.031411058442745776
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 3006276857.874047
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 24050214862.992374
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.18784534228155753
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 103
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0012605281553398058
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0430687589837951
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0012512904081529784
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.03898592591253357
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 3351982859.191884
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 26815862873.535072
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.20944656705772832
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 26
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.004762250000000001
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.004982744499450167
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.004748374150349544
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0038811646473003495
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 3533254850.771389
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 28266038806.171112
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.22077323486449568
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 7
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.01901851428571429
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.010872125017202998
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.019009632383074078
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.01087158253950949
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 3530255748.646293
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 28242045989.170345
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.22058583783093558
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 2
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07661000000000001
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.07659774398803712
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": null
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 3504482534.6543326
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 28035860277.23466
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.21897541456225522
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 2,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U32"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
},
|
|
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1048576
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 8388608
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 270
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0009441937037037035
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.08572346375827458
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0009308705164326564
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.019541093452539494
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1126446677.0506625
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 18023146832.8106
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1407706419708401
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 4194304
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 33554432
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 68
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0036803838235294115
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.04345566028172002
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0036717261111035055
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.04355542870677633
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1142324855.6901317
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 18277197691.042107
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.14275491823170852
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 16777216
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 134217728
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 18
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.01408933888888889
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0017506825580184378
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.014078581280178495
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0016365268643609497
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1191683712.024376
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 19066939392.390015
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.1489232331947483
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 67108864
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": {
|
|
"Input Buffer Size: ": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "bytes"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Size"
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 536870912
|
|
}
|
|
},
|
|
"Number of Samples (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "sample_size"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Samples"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of kernel executions in cold time measurements."
|
|
},
|
|
"value": {
|
|
"type": "int64",
|
|
"value": 5
|
|
}
|
|
},
|
|
"Average CPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "CPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time observed from host."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.05700572
|
|
}
|
|
},
|
|
"CPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold CPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.0126927638236131
|
|
}
|
|
},
|
|
"Average GPU Time (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "duration"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GPU Time"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Average isolated kernel execution time as measured by CUDA events."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.05699455947875977
|
|
}
|
|
},
|
|
"GPU Relative Standard Deviation (Cold)": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Noise"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Relative standard deviation of the cold GPU execution time measurements."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.012703536689392532
|
|
}
|
|
},
|
|
"Element Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "item_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "Elem/s"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of input elements handled per second."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 1177460877.2089825
|
|
}
|
|
},
|
|
"Average Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "byte_rate"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "GlobalMem BW"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Number of bytes read/written per second to the CUDA device's global memory."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 18839374035.34372
|
|
}
|
|
},
|
|
"Percent Peak Global Memory Throughput": {
|
|
"hint": {
|
|
"type": "string",
|
|
"value": "percentage"
|
|
},
|
|
"short_name": {
|
|
"type": "string",
|
|
"value": "BWPeak"
|
|
},
|
|
"description": {
|
|
"type": "string",
|
|
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
|
|
},
|
|
"value": {
|
|
"type": "float64",
|
|
"value": 0.147145823195324
|
|
}
|
|
}
|
|
},
|
|
"is_skipped": false
|
|
},
|
|
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 268435456
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
},
|
|
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": {
|
|
"device": 0,
|
|
"type_config_index": 3,
|
|
"min_samples": 10,
|
|
"min_time": 0.5,
|
|
"max_noise": 0.005,
|
|
"skip_time": -1.0,
|
|
"timeout": 0.5,
|
|
"axis_values": {
|
|
"Key": {
|
|
"type": "string",
|
|
"value": "U64"
|
|
},
|
|
"Input": {
|
|
"type": "string",
|
|
"value": "Rand"
|
|
},
|
|
"Pattern": {
|
|
"type": "string",
|
|
"value": "Ascend"
|
|
},
|
|
"Elements": {
|
|
"type": "int64",
|
|
"value": 1073741824
|
|
},
|
|
"Bits": {
|
|
"type": "string",
|
|
"value": "Half"
|
|
}
|
|
},
|
|
"summaries": null,
|
|
"is_skipped": true,
|
|
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|