nvbench/scripts/test_ref.json

{
  "devices": [
    {
      "id": 0,
      "name": "NVIDIA Quadro GV100",
      "sm_version": 700,
      "ptx_version": 700,
      "sm_default_clock_rate": 1627000000,
      "number_of_sms": 80,
      "max_blocks_per_sm": 32,
      "max_threads_per_sm": 2048,
      "max_threads_per_block": 1024,
      "registers_per_sm": 65536,
      "registers_per_block": 65536,
      "global_memory_size": 34078982144,
      "global_memory_bus_peak_clock_rate": 850000000,
      "global_memory_bus_width": 4096,
      "global_memory_bus_bandwidth": 870400000000,
      "l2_cache_size": 6291456,
      "shared_memory_per_sm": 98304,
      "shared_memory_per_block": 49152,
      "ecc_state": false
    },
    {
      "id": 1,
      "name": "NVIDIA Quadro GP100",
      "sm_version": 600,
      "ptx_version": 600,
      "sm_default_clock_rate": 1442500000,
      "number_of_sms": 56,
      "max_blocks_per_sm": 32,
      "max_threads_per_sm": 2048,
      "max_threads_per_block": 1024,
      "registers_per_sm": 65536,
      "registers_per_block": 65536,
      "global_memory_size": 17069309952,
      "global_memory_bus_peak_clock_rate": 715000000,
      "global_memory_bus_width": 4096,
      "global_memory_bus_bandwidth": 732160000000,
      "l2_cache_size": 4194304,
      "shared_memory_per_sm": 65536,
      "shared_memory_per_block": 49152,
      "ecc_state": false
    }
  ],
  "benchmarks": [
    {
      "index": 0,
      "name": "simple",
      "min_samples": 10,
      "min_time": 0.5,
      "max_noise": 0.005,
      "skip_time": -1.0,
      "timeout": 0.5,
      "devices": [
        0,
        1
      ],
      "axes": null,
      "states": {
        "Device=0": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": null,
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "486"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0010094132736625523"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0005987183296179167"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010034002306039446"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0005072701393681687"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.001001473929135854"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "524"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": null,
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "488"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0010074898913934418"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0005542305355933818"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010027081287298028"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.00035037919649082367"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010014748609703007"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "524"
              }
            }
          },
          "is_skipped": false
        }
      }
    },
    {
      "index": 1,
      "name": "single_float64_axis",
      "min_samples": 10,
      "min_time": 0.5,
      "max_noise": 0.005,
      "skip_time": -1.0,
      "timeout": 0.5,
      "devices": [
        0,
        1
      ],
      "axes": {
        "Duration": {
          "type": "float64",
          "flags": "",
          "values": [
            {
              "input_string": "0",
              "description": "",
              "value": 0.0
            },
            {
              "input_string": "0.0001",
              "description": "",
              "value": 0.0001
            },
            {
              "input_string": "0.0002",
              "description": "",
              "value": 0.0002
            },
            {
              "input_string": "0.0003",
              "description": "",
              "value": 0.00030000000000000003
            },
            {
              "input_string": "0.0004",
              "description": "",
              "value": 0.0004
            },
            {
              "input_string": "0.0005",
              "description": "",
              "value": 0.0005
            },
            {
              "input_string": "0.0006",
              "description": "",
              "value": 0.0006000000000000001
            },
            {
              "input_string": "0.0007",
              "description": "",
              "value": 0.0007000000000000001
            },
            {
              "input_string": "0.0008",
              "description": "",
              "value": 0.0008000000000000001
            },
            {
              "input_string": "0.0009",
              "description": "",
              "value": 0.0009000000000000002
            },
            {
              "input_string": "0.001",
              "description": "",
              "value": 0.0010000000000000002
            }
          ]
        }
      },
      "states": {
        "Device=0 Duration=0": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "14061"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "9.102689638005845e-06"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.033946388108068055"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "3.7547417902904438e-06"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.12549022159970946"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "1.630773172830879e-06"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "306655"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 Duration=0.0001": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0001"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "3835"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.00010860168552803123"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004007949999262656"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00010303751935470811"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004789691009751296"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00010137620362095862"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "5088"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 Duration=0.0002": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0002"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "2174"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.00020898149126034966"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.002070700973146156"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00020338884861017417"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.002418204625044133"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0002017283984223771"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "2583"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 Duration=0.0003": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.00030000000000000003"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1520"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.00030825112500000015"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0014009307905580174"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00030272901975793895"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0016163896900565434"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0003010571695496376"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1742"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 Duration=0.0004": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0004"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1166"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0004085718481989706"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0010690404823574895"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00040307120334734023"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0012226190019077351"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0004014095938278854"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1304"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 Duration=0.0005": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0005"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "945"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0005089798201058188"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0008530028319072816"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0005034217145707861"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0009752402596440034"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0005017619516657686"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1044"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 Duration=0.0006": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0006000000000000001"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "796"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0006082355979899511"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0007134353357638104"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0006027260286424639"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0008279817736951732"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0006010891975612815"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "872"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 Duration=0.0007": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0007000000000000001"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "685"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0007086865854014601"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0006295331091145095"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0007030571342384726"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0007151653876403053"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0007014426981064088"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "748"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 Duration=0.0008": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0008000000000000001"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "602"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0008090872425249167"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0005562631850494214"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0008034305715085621"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0006219681072125149"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0008017951428707951"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "654"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 Duration=0.0009": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0009000000000000002"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "538"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0009084568382899636"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0005031047519089767"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0009027937730448745"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0005501738587938111"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0009011217884181701"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "582"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 Duration=0.001": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0010000000000000002"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "487"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0010086481827515403"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0004266615566594544"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010031193825253714"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0004975122529595318"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.001001475909284053"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "524"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 Duration=0": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "15089"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "8.108349592418312e-06"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.05461449121054022"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "3.271210544150035e-06"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.059765735669007766"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "1.3421442998656208e-06"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "372558"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 Duration=0.0001": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0001"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "3944"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.00010710262145030443"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004128650771669589"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00010247565930403145"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0030818570098060543"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00010137613820964433"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "5117"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 Duration=0.0002": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0002"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "2193"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.00020765215686274505"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0021648763590408093"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00020284258628946086"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0015447061481155045"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0002017285137353667"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "2584"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 Duration=0.0003": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.00030000000000000003"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1537"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0003068213201040992"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0013512096196898148"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00030219575751114794"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0010543163243715088"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0003010567871656286"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1736"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 Duration=0.0004": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0004"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1176"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.00040721289880952437"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0010016437258221326"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0004025331704186726"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0007456691947680211"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0004014084236753499"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1304"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 Duration=0.0005": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0005"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "951"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0005075412103049417"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.000846863074833117"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0005028813449366248"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0006292766848433991"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0005017613753177333"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1045"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 Duration=0.0006": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0006000000000000001"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "800"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0006068351487499997"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0006561812659454387"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0006021752006560568"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.000511717182892197"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0006010895299747637"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "873"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 Duration=0.0007": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0007000000000000001"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "690"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0007071279246376804"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0005386426703062701"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0007025530446266783"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.00042821786377290075"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0007014415557371741"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "748"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 Duration=0.0008": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0008000000000000001"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "605"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0008076996363636364"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0006014433173443102"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0008029008409208492"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.00036509958633429017"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0008017936496559632"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "654"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 Duration=0.0009": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0009000000000000002"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "540"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0009070510574074071"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.00046472458647248545"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0009022252441556363"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.00033895812399517745"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0009011227322607926"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "582"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 Duration=0.001": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "Duration": {
              "type": "float64",
              "value": "0.0010000000000000002"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "488"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0010073550901639342"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0004238073408932392"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010025966528986322"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0003136332645329908"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.001001473929135854"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "524"
              }
            }
          },
          "is_skipped": false
        }
      }
    },
    {
      "index": 2,
      "name": "copy_sweep_grid_shape",
      "min_samples": 10,
      "min_time": 0.5,
      "max_noise": 0.005,
      "skip_time": -1.0,
      "timeout": 0.5,
      "devices": [
        0,
        1
      ],
      "axes": {
        "BlockSize": {
          "type": "int64",
          "flags": "pow2",
          "values": [
            {
              "input_string": "6",
              "description": "2^6 = 64",
              "value": 64
            },
            {
              "input_string": "8",
              "description": "2^8 = 256",
              "value": 256
            },
            {
              "input_string": "10",
              "description": "2^10 = 1024",
              "value": 1024
            }
          ]
        },
        "NumBlocks": {
          "type": "int64",
          "flags": "pow2",
          "values": [
            {
              "input_string": "6",
              "description": "2^6 = 64",
              "value": 64
            },
            {
              "input_string": "8",
              "description": "2^8 = 256",
              "value": 256
            },
            {
              "input_string": "10",
              "description": "2^10 = 1024",
              "value": 1024
            }
          ]
        }
      },
      "states": {
        "Device=0 BlockSize=2^6 NumBlocks=2^6": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "64"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "64"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "71"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.007065658352112677"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.05358128799632556"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.007059958081849862"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.053589324741995806"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "9505561254.326319"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "76044490034.61055"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.08736729094049925"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.006475561071325232"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "81"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 BlockSize=2^8 NumBlocks=2^6": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "256"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "64"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "229"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0021687765283842793"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.006699637202043051"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0021633964730141996"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.00669331351204079"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "31020141170.19388"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "248161129361.55103"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.28511159163781136"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.002160161503025743"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "244"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 BlockSize=2^10 NumBlocks=2^6": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "1024"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "64"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "448"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0010963011227678571"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.013516109455086892"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.001090899714667882"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.013565950821979889"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "61516987398.26961"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "492135899186.15686"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5654134871164486"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010871857387360318"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "481"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 BlockSize=2^6 NumBlocks=2^8": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "64"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "256"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "229"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.002169116519650655"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.003880325099879575"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0021636720515755057"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0038733421374846436"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "31016190254.495274"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "248129522035.9622"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.2850752780744051"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0021606314702289093"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "243"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 BlockSize=2^8 NumBlocks=2^8": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "256"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "256"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "456"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0010761263311403508"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.011961974879208899"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.001070721754902288"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.012050980053815875"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "62676286993.08928"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "501410295944.71423"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5760688142747177"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010696770163143381"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "493"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 BlockSize=2^10 NumBlocks=2^8": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "1024"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "256"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "500"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.000980373466000001"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.005650663121151804"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0009750024316310896"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.005680157515531913"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "68829432443.29456"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "550635459546.3564"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6326234599567514"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0009731230225510264"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "542"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 BlockSize=2^6 NumBlocks=2^10": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "64"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "1024"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "459"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0010701848496732027"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.008617668166839768"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010647455503218568"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.008515610201608317"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "63028076501.20161"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "504224612009.61285"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5793021737242795"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010672177234327936"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "498"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 BlockSize=2^8 NumBlocks=2^10": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "256"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "1024"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "500"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.000979696614"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0074291976714003565"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0009743501433134098"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0074775515242700395"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "68875510986.00674"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "551004087888.054"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6330469759743267"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0009717721991970888"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "541"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 BlockSize=2^10 NumBlocks=2^10": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "1024"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "1024"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "475"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0010337088463157895"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.021637984186463816"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010282407758110449"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.02173209936637211"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "65265709723.54853"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "522125677788.38824"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.599868655547321"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010291563019039125"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "508"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 BlockSize=2^6 NumBlocks=2^6": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "64"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "64"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "76"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.006647754513157893"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0011430629751785044"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.006643085875009235"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0011389249175732911"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "10102061792.16473"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "80816494337.31784"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.11038091993186987"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.006639652300484573"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "79"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 BlockSize=2^8 NumBlocks=2^6": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "256"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "64"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "216"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.002300918597222223"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0022260554559899452"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0022963019234162794"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0022413389898784455"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "29224756255.11826"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "233798050040.94608"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.3193264450952607"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0022975726211280152"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "228"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 BlockSize=2^10 NumBlocks=2^6": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "1024"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "64"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "418"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0011795720191387577"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0035334409960244696"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.001174919423874485"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0035420884521558988"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "57117843688.972115"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "456942749511.7769"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6241023130350974"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0011729015622820172"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "448"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 BlockSize=2^6 NumBlocks=2^8": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "64"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "256"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "224"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0022223120000000006"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0014441799301084402"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00221759328778301"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.001434325968668793"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "30262025218.83109"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "242096201750.6487"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.3306602405903747"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.002216961359573623"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "236"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 BlockSize=2^8 NumBlocks=2^8": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "256"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "256"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "435"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0011336455977011492"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.006534400600481561"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.001129045183631195"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0065638034102788135"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "59438599068.433075"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "475508792547.4646"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6494602170938929"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0011272204485062364"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "466"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 BlockSize=2^10 NumBlocks=2^8": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "1024"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "256"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "437"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0011265385652173912"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.002220966435104119"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0011218978122933775"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0022003475082832675"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "59817269687.70571"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "478538157501.6457"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6535977894198614"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.001119863004765959"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "468"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 BlockSize=2^6 NumBlocks=2^10": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "64"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "1024"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "439"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0011232369088838266"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.00285184985884414"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0011185731920403065"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0028360480110887457"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "59995058416.86738"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "479960467334.939"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6555404110234635"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0011161975045489451"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "468"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 BlockSize=2^8 NumBlocks=2^10": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "256"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "1024"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "440"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0011206702840909095"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.002536479032620614"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0011160453837026254"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0025536971451898373"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "60130945371.914566"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "481047562975.3165"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6570251898155001"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0011138856279089096"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "470"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 BlockSize=2^10 NumBlocks=2^10": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "BlockSize": {
              "type": "int64",
              "value": "1024"
            },
            "NumBlocks": {
              "type": "int64",
              "value": "1024"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "464"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0010597870474137931"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0020209648798997564"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010551077248207455"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.002011195776784625"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "63603803120.10441"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "508830424960.83527"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6949716250011408"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010536742918941392"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "498"
              }
            }
          },
          "is_skipped": false
        }
      }
    },
    {
      "index": 3,
      "name": "copy_type_sweep",
      "min_samples": 10,
      "min_time": 0.5,
      "max_noise": 0.005,
      "skip_time": -1.0,
      "timeout": 0.5,
      "devices": [
        0,
        1
      ],
      "axes": {
        "T": {
          "type": "type",
          "flags": "",
          "values": [
            {
              "input_string": "U8",
              "description": "uint8_t",
              "is_active": true
            },
            {
              "input_string": "U16",
              "description": "uint16_t",
              "is_active": true
            },
            {
              "input_string": "U32",
              "description": "uint32_t",
              "is_active": true
            },
            {
              "input_string": "U64",
              "description": "uint64_t",
              "is_active": true
            },
            {
              "input_string": "F32",
              "description": "float",
              "is_active": true
            },
            {
              "input_string": "F64",
              "description": "double",
              "is_active": true
            }
          ]
        }
      },
      "states": {
        "Device=0 T=U8": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "T": {
              "type": "string",
              "value": "U8"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "217"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.002284935774193548"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.003019023225421965"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0022794654072704396"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0030185067855524154"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "117762460945.3669"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "235524921890.7338"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.2705938900399056"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0022792820785984846"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "231"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 T=U16": {
          "device": 0,
          "type_config_index": 1,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "T": {
              "type": "string",
              "value": "U16"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "341"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0014459254017595295"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.005620271181121053"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0014404413371491634"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.005659383776137258"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "93178197916.5051"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "372712791666.0204"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.4282086301309977"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0014370339589576198"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "365"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 T=U32": {
          "device": 0,
          "type_config_index": 2,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "T": {
              "type": "string",
              "value": "U32"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "456"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0010763392214912279"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.009580925422442722"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010708663173412028"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.009602261983780735"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "62667825958.53892"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "501342607668.31134"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5759910474130415"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010690977880559816"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "489"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 T=U64": {
          "device": 0,
          "type_config_index": 3,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "T": {
              "type": "string",
              "value": "U64"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "514"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0009534325642023344"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.007974682202520992"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0009479809484593146"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.008022855237026269"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "35395681795.64538"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "566330908730.326"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6506559153611283"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0009457213474094653"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "554"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 T=F32": {
          "device": 0,
          "type_config_index": 4,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "T": {
              "type": "string",
              "value": "F32"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "456"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0010769479144736836"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.011261863999383217"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.001071445541946512"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.011287071608158339"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "62633947664.836296"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "501071581318.69037"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5756796660370983"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.001069358981385523"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "490"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 T=F64": {
          "device": 0,
          "type_config_index": 5,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "T": {
              "type": "string",
              "value": "F64"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "514"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0009534943599221791"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.006006780711077088"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0009480226613900089"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.00602313677626831"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "35394124388.125755"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "566305990210.0121"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6506272865464293"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0009457029259723165"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "552"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 T=U8": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "T": {
              "type": "string",
              "value": "U8"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "184"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.00270240325"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0033226300614619185"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.002697714079981265"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0033217171224860604"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "99504783695.18842"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "199009567390.37683"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.2718115813351956"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0026982716095753207"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "195"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 T=U16": {
          "device": 1,
          "type_config_index": 1,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "T": {
              "type": "string",
              "value": "U16"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "325"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0015216281538461547"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0046556036312148845"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0015169812690294725"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004682337277211795"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "88476852509.76712"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "353907410039.0685"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.4833744127500389"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0015158526066057275"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "347"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 T=U32": {
          "device": 1,
          "type_config_index": 2,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "T": {
              "type": "string",
              "value": "U32"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "435"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0011331533540229887"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.006418753103730108"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0011284679349811587"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0064621372230947265"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "59469003876.588196"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "475752031012.70557"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6497924374627206"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0011265910963430138"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "467"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 T=U64": {
          "device": 1,
          "type_config_index": 3,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "T": {
              "type": "string",
              "value": "U64"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "468"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0010515641474358975"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.002762541639974713"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.001046885606570122"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.002764528097772722"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "32051670009.99595"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "512826720159.9352"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.7004298516170443"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.001044835600653889"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "503"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 T=F32": {
          "device": 1,
          "type_config_index": 4,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "T": {
              "type": "string",
              "value": "F32"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "435"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0011328659609195397"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.006308260028809877"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0011281658846756504"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.006329740046854081"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "59484925853.163795"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "475879406825.31036"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6499664101088701"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0011261699270694815"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "470"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 T=F64": {
          "device": 1,
          "type_config_index": 5,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "T": {
              "type": "string",
              "value": "F64"
            }
          },
          "summaries": {
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "468"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0010518281880341881"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.002638709647720786"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010471613009770718"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0026399350413532966"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "32043231514.27718"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "512691704228.4349"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.7002454439308824"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0010447449703140563"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "502"
              }
            }
          },
          "is_skipped": false
        }
      }
    },
    {
      "index": 4,
      "name": "copy_type_conversion_sweep",
      "min_samples": 10,
      "min_time": 0.5,
      "max_noise": 0.005,
      "skip_time": -1.0,
      "timeout": 0.5,
      "devices": [
        0,
        1
      ],
      "axes": {
        "In": {
          "type": "type",
          "flags": "",
          "values": [
            {
              "input_string": "I8",
              "description": "int8_t",
              "is_active": true
            },
            {
              "input_string": "I16",
              "description": "int16_t",
              "is_active": true
            },
            {
              "input_string": "I32",
              "description": "int32_t",
              "is_active": true
            },
            {
              "input_string": "F32",
              "description": "float",
              "is_active": true
            },
            {
              "input_string": "I64",
              "description": "int64_t",
              "is_active": true
            },
            {
              "input_string": "F64",
              "description": "double",
              "is_active": true
            }
          ]
        },
        "Out": {
          "type": "type",
          "flags": "",
          "values": [
            {
              "input_string": "I8",
              "description": "int8_t",
              "is_active": true
            },
            {
              "input_string": "I16",
              "description": "int16_t",
              "is_active": true
            },
            {
              "input_string": "I32",
              "description": "int32_t",
              "is_active": true
            },
            {
              "input_string": "F32",
              "description": "float",
              "is_active": true
            },
            {
              "input_string": "I64",
              "description": "int64_t",
              "is_active": true
            },
            {
              "input_string": "F64",
              "description": "double",
              "is_active": true
            }
          ]
        }
      },
      "states": {
        "Device=0 In=I8 Out=I8": {
          "device": 0,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I8"
            },
            "Out": {
              "type": "string",
              "value": "I8"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        "Device=0 In=I8 Out=I16": {
          "device": 0,
          "type_config_index": 1,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I8"
            },
            "Out": {
              "type": "string",
              "value": "I16"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "134217728"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "775"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0006248230980645156"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0027640779893251216"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0006193935315070645"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0028186397219177456"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "108346084655.93024"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "325038253967.7907"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.37343549398873016"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0006171660299862132"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "850"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=I8 Out=I32": {
          "device": 0,
          "type_config_index": 2,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I8"
            },
            "Out": {
              "type": "string",
              "value": "I32"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "268435456"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "660"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0007372658136363634"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004348049843468552"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0007317814296845251"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004351029775591727"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "91706158803.36154"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "458530794016.8077"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5268046806259279"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0007299521218782687"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "717"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=I8 Out=F32": {
          "device": 0,
          "type_config_index": 3,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I8"
            },
            "Out": {
              "type": "string",
              "value": "F32"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "268435456"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "656"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.000742387521341463"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0041525675601748364"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0007369443420775064"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004193469264853706"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "91063680346.35373"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "455318401731.7686"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5231139725778592"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0007352807822347689"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "714"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=I8 Out=I64": {
          "device": 0,
          "type_config_index": 4,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I8"
            },
            "Out": {
              "type": "string",
              "value": "I64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "536870912"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "407"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0012095483882063889"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.009732185124544102"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.001204128551248836"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.009798212399727946"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "55732308589.8092"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "501590777308.2828"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.576276168782494"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0012017273091491842"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "429"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=I8 Out=F64": {
          "device": 0,
          "type_config_index": 5,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I8"
            },
            "Out": {
              "type": "string",
              "value": "F64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "536870912"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "415"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0011847366168674703"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.011261383409993239"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0011792877487389432"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.011302242538631406"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "56906267424.351715"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "512156406819.16547"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5884149894521662"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0011767830588600852"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "440"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=I16 Out=I8": {
          "device": 0,
          "type_config_index": 6,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I16"
            },
            "Out": {
              "type": "string",
              "value": "I8"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=0 In=I16 Out=I16": {
          "device": 0,
          "type_config_index": 7,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I16"
            },
            "Out": {
              "type": "string",
              "value": "I16"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        "Device=0 In=I16 Out=I32": {
          "device": 0,
          "type_config_index": 8,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I16"
            },
            "Out": {
              "type": "string",
              "value": "I32"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "33554432"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "134217728"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1105"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.00043142517375565617"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.01116818587784149"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00042600826737028365"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.011332580467569093"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "78764743715.25449"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "472588462291.5269"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5429554943606697"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00042359266142467694"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1238"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=I16 Out=F32": {
          "device": 0,
          "type_config_index": 9,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I16"
            },
            "Out": {
              "type": "string",
              "value": "F32"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "33554432"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "134217728"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1102"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.00043289838384754937"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.008465395678081931"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00042745939692221985"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.008617999240612035"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "78497354933.81969"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "470984129602.9181"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5411122812533525"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00042536910129233627"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1229"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=I16 Out=I64": {
          "device": 0,
          "type_config_index": 10,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I16"
            },
            "Out": {
              "type": "string",
              "value": "I64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "33554432"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "268435456"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "734"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0006609588569482289"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.007896476276327823"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0006555628124472239"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.007981909890800989"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "51184160179.466095"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "511841601794.66095"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5880533108854101"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0006538430490801411"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "806"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=I16 Out=F64": {
          "device": 0,
          "type_config_index": 11,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I16"
            },
            "Out": {
              "type": "string",
              "value": "F64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "33554432"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "268435456"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "734"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0006605395899182562"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.007740408518735753"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0006550883051485072"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.007833851008491804"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "51221234963.72489"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "512212349637.2489"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5884792619913246"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0006534532250824923"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "805"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=I32 Out=I8": {
          "device": 0,
          "type_config_index": 12,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I32"
            },
            "Out": {
              "type": "string",
              "value": "I8"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=0 In=I32 Out=I16": {
          "device": 0,
          "type_config_index": 13,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I32"
            },
            "Out": {
              "type": "string",
              "value": "I16"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=0 In=I32 Out=I32": {
          "device": 0,
          "type_config_index": 14,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I32"
            },
            "Out": {
              "type": "string",
              "value": "I32"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        "Device=0 In=I32 Out=F32": {
          "device": 0,
          "type_config_index": 15,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I32"
            },
            "Out": {
              "type": "string",
              "value": "F32"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "16777216"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1735"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.00026702492853025945"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.01324576727299336"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00026161364844278195"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.013450268523907918"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "64129742847.37816"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "513037942779.02527"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5894277835236963"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00025957003988639885"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "2015"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=I32 Out=I64": {
          "device": 0,
          "type_config_index": 16,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I32"
            },
            "Out": {
              "type": "string",
              "value": "I64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "16777216"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "134217728"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1234"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0003841953128038892"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.008873245446388355"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0003788044850192556"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.008932234099031263"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "44289908550.44172"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "531478902605.3006"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6106145480299869"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00037766468619885956"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1381"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=I32 Out=F64": {
          "device": 0,
          "type_config_index": 17,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I32"
            },
            "Out": {
              "type": "string",
              "value": "F64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "16777216"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "134217728"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1235"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0003840312064777327"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.009389520289783196"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00037863498520754796"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.009545097161422792"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "44309735379.58624"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "531716824555.03485"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6108878958582662"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0003773968978051128"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1396"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=F32 Out=I8": {
          "device": 0,
          "type_config_index": 18,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F32"
            },
            "Out": {
              "type": "string",
              "value": "I8"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=0 In=F32 Out=I16": {
          "device": 0,
          "type_config_index": 19,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F32"
            },
            "Out": {
              "type": "string",
              "value": "I16"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=0 In=F32 Out=I32": {
          "device": 0,
          "type_config_index": 20,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F32"
            },
            "Out": {
              "type": "string",
              "value": "I32"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "16777216"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1726"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.00026856249884125153"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.01342456387766187"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00026315643022814674"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.013724796519135959"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "63753775598.24316"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "510030204785.94525"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5859722021897349"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0002609094005709575"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "2047"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=F32 Out=F32": {
          "device": 0,
          "type_config_index": 21,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F32"
            },
            "Out": {
              "type": "string",
              "value": "F32"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        "Device=0 In=F32 Out=I64": {
          "device": 0,
          "type_config_index": 22,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F32"
            },
            "Out": {
              "type": "string",
              "value": "I64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "16777216"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "134217728"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1235"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0003840352834008098"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.009209302867708775"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00037863381922486526"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.009434239106344595"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "44309871829.05669"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "531718461948.68024"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6108897770550095"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00037729541193829834"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1370"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=F32 Out=F64": {
          "device": 0,
          "type_config_index": 23,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F32"
            },
            "Out": {
              "type": "string",
              "value": "F64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "16777216"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "134217728"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1233"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0003844534225466336"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.009387088977698597"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00037907109053659035"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.009568452852068391"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "44258758894.67376"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "531105106736.0851"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6101850950552448"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0003776787067281789"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1392"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=I64 Out=I8": {
          "device": 0,
          "type_config_index": 24,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I64"
            },
            "Out": {
              "type": "string",
              "value": "I8"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=0 In=I64 Out=I16": {
          "device": 0,
          "type_config_index": 25,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I64"
            },
            "Out": {
              "type": "string",
              "value": "I16"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=0 In=I64 Out=I32": {
          "device": 0,
          "type_config_index": 26,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I64"
            },
            "Out": {
              "type": "string",
              "value": "I32"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=0 In=I64 Out=F32": {
          "device": 0,
          "type_config_index": 27,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I64"
            },
            "Out": {
              "type": "string",
              "value": "F32"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=0 In=I64 Out=I64": {
          "device": 0,
          "type_config_index": 28,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I64"
            },
            "Out": {
              "type": "string",
              "value": "I64"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        "Device=0 In=I64 Out=F64": {
          "device": 0,
          "type_config_index": 29,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I64"
            },
            "Out": {
              "type": "string",
              "value": "F64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "8388608"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1865"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0002468652632707771"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.008794568336063534"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0002414397094508553"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.009088437943671243"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "34744110730.913086"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "555905771694.6094"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6386785060829612"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00023926271107803853"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "2168"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=F64 Out=I8": {
          "device": 0,
          "type_config_index": 30,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F64"
            },
            "Out": {
              "type": "string",
              "value": "I8"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=0 In=F64 Out=I16": {
          "device": 0,
          "type_config_index": 31,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F64"
            },
            "Out": {
              "type": "string",
              "value": "I16"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=0 In=F64 Out=I32": {
          "device": 0,
          "type_config_index": 32,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F64"
            },
            "Out": {
              "type": "string",
              "value": "I32"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=0 In=F64 Out=F32": {
          "device": 0,
          "type_config_index": 33,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F64"
            },
            "Out": {
              "type": "string",
              "value": "F32"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=0 In=F64 Out=I64": {
          "device": 0,
          "type_config_index": 34,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F64"
            },
            "Out": {
              "type": "string",
              "value": "I64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "8388608"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1861"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0002474318479312196"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.009416123268532244"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00024199313163148308"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.009609928378243537"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "34664653262.864136"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "554634452205.8262"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6372178908614731"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00024011272523290366"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "2177"
              }
            }
          },
          "is_skipped": false
        },
        "Device=0 In=F64 Out=F64": {
          "device": 0,
          "type_config_index": 35,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F64"
            },
            "Out": {
              "type": "string",
              "value": "F64"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        "Device=1 In=I8 Out=I8": {
          "device": 1,
          "type_config_index": 0,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I8"
            },
            "Out": {
              "type": "string",
              "value": "I8"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        "Device=1 In=I8 Out=I16": {
          "device": 1,
          "type_config_index": 1,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I8"
            },
            "Out": {
              "type": "string",
              "value": "I16"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "134217728"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "715"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0006812909104895107"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.029682520209047932"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0006765060471488043"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.029725089166496972"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "99199207875.28265"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "297597623625.84796"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.4064652857651988"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.000659287437142213"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "797"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=I8 Out=I32": {
          "device": 1,
          "type_config_index": 2,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I8"
            },
            "Out": {
              "type": "string",
              "value": "I32"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "268435456"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "566"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0008641483356890464"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.00815440605473416"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0008593197461783684"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.008178118032486274"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "78095335640.14047"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "390476678200.70233"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5333215119655572"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0008574365556141886"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "607"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=I8 Out=F32": {
          "device": 1,
          "type_config_index": 3,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I8"
            },
            "Out": {
              "type": "string",
              "value": "F32"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "268435456"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "568"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0008621727816901408"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.008348927642653206"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.000857566987334842"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.008407666935430734"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "78254952663.88672"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "391274763319.4336"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5344115539218662"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0008559337940091401"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "612"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=I8 Out=I64": {
          "device": 1,
          "type_config_index": 4,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I8"
            },
            "Out": {
              "type": "string",
              "value": "I64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "536870912"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "339"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0014581254159292036"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.005934832249204677"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.001453499562620765"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.005963799027107206"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "46170542961.153595"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "415534886650.3824"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5675465562860337"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0014501432381838642"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "361"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=I8 Out=F64": {
          "device": 1,
          "type_config_index": 5,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I8"
            },
            "Out": {
              "type": "string",
              "value": "F64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "536870912"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "339"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0014608549616519177"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.005454444454530878"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0014561624537527042"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0054738241927221685"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "46086110671.96002"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "414774996047.64026"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.5665086812276555"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0014524769206623453"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "364"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=I16 Out=I8": {
          "device": 1,
          "type_config_index": 6,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I16"
            },
            "Out": {
              "type": "string",
              "value": "I8"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=1 In=I16 Out=I16": {
          "device": 1,
          "type_config_index": 7,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I16"
            },
            "Out": {
              "type": "string",
              "value": "I16"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        "Device=1 In=I16 Out=I32": {
          "device": 1,
          "type_config_index": 8,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I16"
            },
            "Out": {
              "type": "string",
              "value": "I32"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "33554432"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "134217728"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1042"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.00046152389539347375"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.007516961198942111"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0004568425950928514"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0075614567935713"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "73448562722.52853"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "440691376335.17114"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6019058352479938"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00045486935942230756"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1156"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=I16 Out=F32": {
          "device": 1,
          "type_config_index": 9,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I16"
            },
            "Out": {
              "type": "string",
              "value": "F32"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "33554432"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "134217728"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1047"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.00045967601432664773"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.007580415029008197"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00045502618507418957"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.007602410404504316"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "73741760585.77625"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "442450563514.6575"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6043085712339618"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0004530724069916505"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1164"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=I16 Out=I64": {
          "device": 1,
          "type_config_index": 10,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I16"
            },
            "Out": {
              "type": "string",
              "value": "I64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "33554432"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "268435456"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "648"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0007539600570987655"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.005701338376763893"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.000749293333218421"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.005730659247124155"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "44781436738.365845"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "447814367383.65845"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6116345708365091"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0007462590063859665"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "701"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=I16 Out=F64": {
          "device": 1,
          "type_config_index": 11,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I16"
            },
            "Out": {
              "type": "string",
              "value": "F64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "33554432"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "268435456"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "650"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0007515365646153841"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.005320261152122883"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0007468673968315132"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.00533121216008688"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "44926893505.259796"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "449268935052.59796"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6136212508913325"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0007440289011028757"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "702"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=I32 Out=I8": {
          "device": 1,
          "type_config_index": 12,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I32"
            },
            "Out": {
              "type": "string",
              "value": "I8"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=1 In=I32 Out=I16": {
          "device": 1,
          "type_config_index": 13,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I32"
            },
            "Out": {
              "type": "string",
              "value": "I16"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=1 In=I32 Out=I32": {
          "device": 1,
          "type_config_index": 14,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I32"
            },
            "Out": {
              "type": "string",
              "value": "I32"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        "Device=1 In=I32 Out=F32": {
          "device": 1,
          "type_config_index": 15,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I32"
            },
            "Out": {
              "type": "string",
              "value": "F32"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "16777216"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1688"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.00027765218187203764"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.005690620491369388"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00027302053109941316"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.005713997774637474"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "61450382256.75059"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "491603058054.0047"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6714421138193901"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00027140032503120137"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1928"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=I32 Out=I64": {
          "device": 1,
          "type_config_index": 16,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I32"
            },
            "Out": {
              "type": "string",
              "value": "I64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "16777216"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "134217728"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1134"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.000422905379188712"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004818481737573335"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0004182333121013812"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004829428135064118"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "40114489961.844894"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "481373879542.13873"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6574708800564614"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0004160488643510754"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1267"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=I32 Out=F64": {
          "device": 1,
          "type_config_index": 17,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I32"
            },
            "Out": {
              "type": "string",
              "value": "F64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "16777216"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "134217728"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1132"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0004233320008833917"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004685003714910728"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00041865130761381596"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004676709118042214"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "40074438309.11453"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "480893259709.37445"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6568144390698405"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00041636213471617884"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1264"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=F32 Out=I8": {
          "device": 1,
          "type_config_index": 18,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F32"
            },
            "Out": {
              "type": "string",
              "value": "I8"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=1 In=F32 Out=I16": {
          "device": 1,
          "type_config_index": 19,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F32"
            },
            "Out": {
              "type": "string",
              "value": "I16"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=1 In=F32 Out=I32": {
          "device": 1,
          "type_config_index": 20,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F32"
            },
            "Out": {
              "type": "string",
              "value": "I32"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "16777216"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1665"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0002817099831831833"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.012603278274487326"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0002770048382224978"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.0127786417628205"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "60566508901.63906"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "484532071213.1125"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6617844067049723"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0002751834324989535"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1941"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=F32 Out=F32": {
          "device": 1,
          "type_config_index": 21,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F32"
            },
            "Out": {
              "type": "string",
              "value": "F32"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        "Device=1 In=F32 Out=I64": {
          "device": 1,
          "type_config_index": 22,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F32"
            },
            "Out": {
              "type": "string",
              "value": "I64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "16777216"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "134217728"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1133"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0004230943777581643"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004719817832949844"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00041844157444515244"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004751688895767683"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "40094524599.393234"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "481134295192.7188"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.657143650558237"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0004160357588015425"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1252"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=F32 Out=F64": {
          "device": 1,
          "type_config_index": 23,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F32"
            },
            "Out": {
              "type": "string",
              "value": "F64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "16777216"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "134217728"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1132"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.00042342536395759757"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004748798224952708"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00041871643782504436"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004750041166889743"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "40068204838.45002"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "480818458061.40027"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6567122733574632"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00041632065453087554"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1252"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=I64 Out=I8": {
          "device": 1,
          "type_config_index": 24,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I64"
            },
            "Out": {
              "type": "string",
              "value": "I8"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=1 In=I64 Out=I16": {
          "device": 1,
          "type_config_index": 25,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I64"
            },
            "Out": {
              "type": "string",
              "value": "I16"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=1 In=I64 Out=I32": {
          "device": 1,
          "type_config_index": 26,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I64"
            },
            "Out": {
              "type": "string",
              "value": "I32"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=1 In=I64 Out=F32": {
          "device": 1,
          "type_config_index": 27,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I64"
            },
            "Out": {
              "type": "string",
              "value": "F32"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=1 In=I64 Out=I64": {
          "device": 1,
          "type_config_index": 28,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I64"
            },
            "Out": {
              "type": "string",
              "value": "I64"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        "Device=1 In=I64 Out=F64": {
          "device": 1,
          "type_config_index": 29,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "I64"
            },
            "Out": {
              "type": "string",
              "value": "F64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "8388608"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1753"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.0002666450433542495"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004046628770937376"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00026198611749762206"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004009600477982423"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "32019284380.88381"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "512308550094.1409"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6997221237081251"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00026007244216493403"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "2008"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=F64 Out=I8": {
          "device": 1,
          "type_config_index": 30,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F64"
            },
            "Out": {
              "type": "string",
              "value": "I8"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=1 In=F64 Out=I16": {
          "device": 1,
          "type_config_index": 31,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F64"
            },
            "Out": {
              "type": "string",
              "value": "I16"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=1 In=F64 Out=I32": {
          "device": 1,
          "type_config_index": 32,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F64"
            },
            "Out": {
              "type": "string",
              "value": "I32"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=1 In=F64 Out=F32": {
          "device": 1,
          "type_config_index": 33,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F64"
            },
            "Out": {
              "type": "string",
              "value": "F32"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        "Device=1 In=F64 Out=I64": {
          "device": 1,
          "type_config_index": 34,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F64"
            },
            "Out": {
              "type": "string",
              "value": "I64"
            }
          },
          "summaries": {
            "Element count: Items": {
              "short_name": {
                "type": "string",
                "value": "Items"
              },
              "value": {
                "type": "int64",
                "value": "8388608"
              }
            },
            "Input Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "InSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Output Buffer Size: ": {
              "hint": {
                "type": "string",
                "value": "bytes"
              },
              "short_name": {
                "type": "string",
                "value": "OutSize"
              },
              "value": {
                "type": "int64",
                "value": "67108864"
              }
            },
            "Number of Samples (Cold)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Samples"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in cold time measurements."
              },
              "value": {
                "type": "int64",
                "value": "1753"
              }
            },
            "Average CPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "CPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time observed from host."
              },
              "value": {
                "type": "float64",
                "value": "0.00026657142213348556"
              }
            },
            "CPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold CPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.004288873685096382"
              }
            },
            "Average GPU Time (Cold)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "GPU Time"
              },
              "description": {
                "type": "string",
                "value": "Average isolated kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.0002619141041552483"
              }
            },
            "GPU Relative Standard Deviation (Cold)": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "Noise"
              },
              "description": {
                "type": "string",
                "value": "Relative standard deviation of the cold GPU execution time measurements."
              },
              "value": {
                "type": "float64",
                "value": "0.00422427515777647"
              }
            },
            "Element Throughput": {
              "hint": {
                "type": "string",
                "value": "item_rate"
              },
              "short_name": {
                "type": "string",
                "value": "Elem/s"
              },
              "description": {
                "type": "string",
                "value": "Number of input elements handled per second."
              },
              "value": {
                "type": "float64",
                "value": "32028088090.39048"
              }
            },
            "Average Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "byte_rate"
              },
              "short_name": {
                "type": "string",
                "value": "GlobalMem BW"
              },
              "description": {
                "type": "string",
                "value": "Number of bytes read/written per second to the CUDA device's global memory."
              },
              "value": {
                "type": "float64",
                "value": "512449409446.2477"
              }
            },
            "Percent Peak Global Memory Throughput": {
              "hint": {
                "type": "string",
                "value": "percentage"
              },
              "short_name": {
                "type": "string",
                "value": "BWPeak"
              },
              "description": {
                "type": "string",
                "value": "Global device memory throughput as a percentage of the device's peak bandwidth."
              },
              "value": {
                "type": "float64",
                "value": "0.6999145124648269"
              }
            },
            "Average GPU Time (Batch)": {
              "hint": {
                "type": "string",
                "value": "duration"
              },
              "short_name": {
                "type": "string",
                "value": "Batch GPU"
              },
              "description": {
                "type": "string",
                "value": "Average back-to-back kernel execution time as measured by CUDA events."
              },
              "value": {
                "type": "float64",
                "value": "0.00026007216520352087"
              }
            },
            "Number of Samples (Batch)": {
              "hint": {
                "type": "string",
                "value": "sample_size"
              },
              "short_name": {
                "type": "string",
                "value": "Batch"
              },
              "description": {
                "type": "string",
                "value": "Number of kernel executions in hot time measurements."
              },
              "value": {
                "type": "int64",
                "value": "2013"
              }
            }
          },
          "is_skipped": false
        },
        "Device=1 In=F64 Out=F64": {
          "device": 1,
          "type_config_index": 35,
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 0.5,
          "axis_values": {
            "In": {
              "type": "string",
              "value": "F64"
            },
            "Out": {
              "type": "string",
              "value": "F64"
            }
          },
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        }
      }
    }
  ]
}