nvbench/scripts/test_ref.json

{
  "meta": {
    "argv": [
      "bin/nvbench.example.axes",
      "--json",
      "/home/av/code/src/nvbench/scripts/test_ref.json"
    ],
    "version": {
      "json": {
        "major": 1,
        "minor": 0,
        "patch": 0,
        "string": "1.0.0"
      },
      "nvbench": {
        "major": 0,
        "minor": 1,
        "patch": 0,
        "string": "0.1.0",
        "git_branch": "walltime_reports",
        "git_sha": "348acbd6eb752a87e15c28fe1ad1cb827eaaadec",
        "git_version": "old-cmake-63-g348acbd",
        "git_is_dirty": false
      }
    }
  },
  "devices": [
    {
      "id": 0,
      "name": "Quadro GV100",
      "sm_version": 700,
      "ptx_version": 700,
      "sm_default_clock_rate": 1627000000,
      "number_of_sms": 80,
      "max_blocks_per_sm": 32,
      "max_threads_per_sm": 2048,
      "max_threads_per_block": 1024,
      "registers_per_sm": 65536,
      "registers_per_block": 65536,
      "global_memory_size": 34086060032,
      "global_memory_bus_peak_clock_rate": 850000000,
      "global_memory_bus_width": 4096,
      "global_memory_bus_bandwidth": 870400000000,
      "l2_cache_size": 6291456,
      "shared_memory_per_sm": 98304,
      "shared_memory_per_block": 49152,
      "ecc_state": false
    },
    {
      "id": 1,
      "name": "Quadro GP100",
      "sm_version": 600,
      "ptx_version": 600,
      "sm_default_clock_rate": 1442500000,
      "number_of_sms": 56,
      "max_blocks_per_sm": 32,
      "max_threads_per_sm": 2048,
      "max_threads_per_block": 1024,
      "registers_per_sm": 65536,
      "registers_per_block": 65536,
      "global_memory_size": 17069309952,
      "global_memory_bus_peak_clock_rate": 715000000,
      "global_memory_bus_width": 4096,
      "global_memory_bus_bandwidth": 732160000000,
      "l2_cache_size": 4194304,
      "shared_memory_per_sm": 65536,
      "shared_memory_per_block": 49152,
      "ecc_state": false
    }
  ],
  "benchmarks": [
    {
      "name": "simple",
      "index": 0,
      "min_samples": 10,
      "min_time": 0.5,
      "max_noise": 0.005,
      "skip_time": -1.0,
      "timeout": 15.0,
      "devices": [
        0,
        1
      ],
      "axes": null,
      "states": [
        {
          "name": "Device=0",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": null,
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "499"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010094458717434867"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005997663682735138"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010034715849794225"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0005782350585973689"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.51435071"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "524"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001001475909284053"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524782268"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": null,
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "499"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010075622164328662"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004836642334083953"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010027443022431728"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00034308545348455907"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.512193993"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "524"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010014738126565483"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5247834060000001"
                }
              ]
            }
          ],
          "is_skipped": false
        }
      ]
    },
    {
      "name": "single_float64_axis",
      "index": 1,
      "min_samples": 10,
      "min_time": 0.5,
      "max_noise": 0.005,
      "skip_time": -1.0,
      "timeout": 15.0,
      "devices": [
        0,
        1
      ],
      "axes": [
        {
          "name": "Duration",
          "type": "float64",
          "flags": "",
          "values": [
            {
              "input_string": "0",
              "description": "",
              "value": 0.0
            },
            {
              "input_string": "0.0001",
              "description": "",
              "value": 0.0001
            },
            {
              "input_string": "0.0002",
              "description": "",
              "value": 0.0002
            },
            {
              "input_string": "0.0003",
              "description": "",
              "value": 0.00030000000000000003
            },
            {
              "input_string": "0.0004",
              "description": "",
              "value": 0.0004
            },
            {
              "input_string": "0.0005",
              "description": "",
              "value": 0.0005
            },
            {
              "input_string": "0.0006",
              "description": "",
              "value": 0.0006000000000000001
            },
            {
              "input_string": "0.0007",
              "description": "",
              "value": 0.0007000000000000001
            },
            {
              "input_string": "0.0008",
              "description": "",
              "value": 0.0008000000000000001
            },
            {
              "input_string": "0.0009",
              "description": "",
              "value": 0.0009000000000000002
            },
            {
              "input_string": "0.001",
              "description": "",
              "value": 0.0010000000000000002
            }
          ]
        }
      ],
      "states": [
        {
          "name": "Device=0 Duration=0",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "127488"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "9.540251349146535e-06"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.4435508787705211"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "3.9224058844425625e-06"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.14064817853323436"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "11.490547931"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "274905"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.8188127571551e-06"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.500083096"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0001",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0001"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "4853"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00010853461796826674"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.05359830602702947"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00010302987600936478"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00484111901842999"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.637141422"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "5092"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0001013762061275653"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.51621627"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0002",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0002"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2459"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00020891169174461132"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.02717422799526722"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00020340182381027155"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002406936807045068"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5674029660000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2582"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00020172880307174672"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.520873229"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0003",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.00030000000000000003"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1652"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003081868111380144"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.018240770684480382"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00030268341853095175"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0016523707958282026"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5447823540000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1736"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00030105657621462773"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5226434630000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0004",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0004"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1241"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00040852977276389983"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.013603343023457075"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00040306361880540617"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0012210042127847492"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.53335829"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1304"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00040140879812416123"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5234471270000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0005",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0005"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "994"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0005089016619718308"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.010853962612041912"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000503456178265558"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009750606561696034"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5268077360000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1044"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0005017619516657686"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.523849472"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0006",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0006000000000000001"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "830"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006082555698795184"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.009191209785025295"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000602735921345563"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008234812151490051"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.522369137"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "872"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006010903174724053"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524159087"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0007",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0007000000000000001"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "712"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000708571620786517"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007823433090894212"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007030903266721907"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007055254847806133"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5193877680000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "748"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007014426981064088"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524686893"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0008",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0008000000000000001"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "623"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008089194157303374"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006828496858360085"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008034522826177895"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000611164680542835"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.516959448"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "654"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008017951428707951"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524383518"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0009",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0009000000000000002"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "554"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009082872328519855"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006126265423787953"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009027800905360124"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00054941989913754"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.514815663"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "582"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009011235712320125"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524463788"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.001",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0010000000000000002"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "499"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010086229759519048"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005485055388542774"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010031437666000492"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004923631784045008"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.513707802"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "524"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001001475909284053"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5247822560000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "153037"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "7.764162771094724e-06"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.5441551718680286"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "3.05725036652246e-06"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0422080578285922"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "15.000158798000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "369923"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.3516386844065532e-06"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5000379110000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0001",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0001"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "4880"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00010714987602459019"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.04579843914769717"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00010247656405124585"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.003070733813086406"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.623587079"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "5111"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00010137617021268466"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5181448860000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0002",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0002"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2466"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002074915798864561"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.023038028738255983"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00020283785226716245"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0015459112691612667"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5598459640000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2588"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002017284788341021"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.522084334"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0003",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.00030000000000000003"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1655"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00030685509425981873"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.015537955668238047"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00030217996281079384"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010230869145796749"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.539645641"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1737"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003010563825696243"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.522946315"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0004",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0004"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1243"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004072019324215605"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.011695107558689763"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004025163378863194"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007675334678184685"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.529549118"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1304"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004014085172875527"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.523448241"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0005",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0005"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "995"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0005075862180904529"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.009382010906172408"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0005028912236343076"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000614146973517185"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.523755894"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1044"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0005017611916494552"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.523849106"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0006",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0006000000000000001"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "831"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000606902394705175"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007845243707907233"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006022033425301283"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0005063896891280906"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.519957368"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "872"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006010888475890554"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524160638"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0007",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0007000000000000001"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "712"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007072048665730335"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0066432216884127464"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000702561125326692"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000444186835693086"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.516785433"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "747"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007014415899274179"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.523988754"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0008",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0008000000000000001"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "623"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008075409711075438"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005835313968640737"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008028804110677048"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00038458153244696385"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.514684533"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "654"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008017937429818903"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524384532"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0009",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0009000000000000002"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "555"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009069636108108111"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00524675883682327"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009022562016237966"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00035453453918318805"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.513753633"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "583"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009011228374919596"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.525365383"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.001",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0010000000000000002"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "499"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010072655711422854"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004687180507983469"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010025901990328623"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003187607548154873"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.511897986"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "524"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010014750939289121"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524783733"
                }
              ]
            }
          ],
          "is_skipped": false
        }
      ]
    },
    {
      "name": "copy_sweep_grid_shape",
      "index": 2,
      "min_samples": 10,
      "min_time": 0.5,
      "max_noise": 0.005,
      "skip_time": -1.0,
      "timeout": 15.0,
      "devices": [
        0,
        1
      ],
      "axes": [
        {
          "name": "BlockSize",
          "type": "int64",
          "flags": "pow2",
          "values": [
            {
              "input_string": "6",
              "description": "2^6 = 64",
              "value": 64
            },
            {
              "input_string": "8",
              "description": "2^8 = 256",
              "value": 256
            },
            {
              "input_string": "10",
              "description": "2^10 = 1024",
              "value": 1024
            }
          ]
        },
        {
          "name": "NumBlocks",
          "type": "int64",
          "flags": "pow2",
          "values": [
            {
              "input_string": "6",
              "description": "2^6 = 64",
              "value": 64
            },
            {
              "input_string": "8",
              "description": "2^8 = 256",
              "value": 256
            },
            {
              "input_string": "10",
              "description": "2^10 = 1024",
              "value": 1024
            }
          ]
        }
      ],
      "states": [
        {
          "name": "Device=0 BlockSize=2^6 NumBlocks=2^6",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "64"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "78"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006491010679487182"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0014400823428293225"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006485689823444072"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001177496193520018"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "10347220700.783287"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "82777765606.2663"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.09510313144102286"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.507885141"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "81"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006479606722608024"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524857216"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 BlockSize=2^8 NumBlocks=2^6",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "256"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "656"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002171159740853659"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008002387413205372"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0021657661977337647"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007621859662189712"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "30986199743.177273"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "247889597945.41818"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.2847996299924382"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.437881264"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "657"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0021637841704410677"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.425925528"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 BlockSize=2^10 NumBlocks=2^6",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "1024"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "752"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010918482712765959"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012885667055169438"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010864888095158216"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.011885928967750255"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "61766732811.45538"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "494133862491.64307"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5677089412817591"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.8365534670000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "753"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010831134183156693"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.8183730920000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 BlockSize=2^6 NumBlocks=2^8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "64"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "256"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "231"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002170435731601731"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004358290289328953"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0021650726464919703"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.003568770929248165"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "30996125746.05075"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "247969005968.406"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.28489086163649585"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.50610783"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "243"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0021624730742026746"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5254883690000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 BlockSize=2^8 NumBlocks=2^8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "256"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "256"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "848"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001072975840801887"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.010997227168594192"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001067600981103923"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.009773228014049224"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "62859500120.174065"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "502876000961.3925"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5777527584574822"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.9275834220000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "849"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010646783151390692"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.907651745"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 BlockSize=2^10 NumBlocks=2^8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "1024"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "256"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1456"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009655372026098907"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007865605432218092"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009601815831693899"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005554818166025829"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "69891846684.33807"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "559134773474.7046"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6423882967310485"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.4363716210000002"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1457"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009584971580017669"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.4059789770000002"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 BlockSize=2^6 NumBlocks=2^10",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "64"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "1024"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "976"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010651546700819676"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.010462843606792348"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010597991125016906"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.009173166661501059"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "63322249668.22941"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "506577997345.83527"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5820059712153438"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.059860389"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "977"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010574653456130558"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.038207793"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 BlockSize=2^8 NumBlocks=2^10",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "256"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "1024"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1231"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009616476466287569"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007525336475207418"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009562607302014941"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004999666932127862"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "70178416702.1681"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "561427333617.3448"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6450222123361039"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.209430215"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1232"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009541545041486059"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.182675849"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 BlockSize=2^10 NumBlocks=2^10",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "1024"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "1024"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "496"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001023795669354839"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.03114530461728092"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010184043220454653"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0306728390106973"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "65896091117.53555"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "527168728940.2844"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6056626021832312"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.518169856"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "542"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010072569899893336"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.54594076"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^6 NumBlocks=2^6",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "64"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2244"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0066659496501782385"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012044246591117944"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0066612275798478"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012013908599240357"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "10074549052.04325"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "80596392416.346"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.11008029995676627"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "15.003487063000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2245"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006649818384514629"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "14.950437548000002"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^8 NumBlocks=2^6",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "256"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "218"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0022997498486238524"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0031075885812940247"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0022950336933135985"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002319295976145478"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "29240905785.18147"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "233927246281.45175"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.319502904121301"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.505458689"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "228"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0022943040278919956"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5231123440000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^10 NumBlocks=2^6",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "1024"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "426"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011787892863849767"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0053738632436882575"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011741032116289985"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.003574335492607712"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "57157550831.40471"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "457260406651.2377"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6245361760424466"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.510194696"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "448"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011726912089756558"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.525375562"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^6 NumBlocks=2^8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "64"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "256"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "226"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002220062486725664"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0026283185437807914"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002215349671060005"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0015099121235202378"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "30292673376.42893"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "242341387011.43143"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.33099511993475667"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5059807590000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "237"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002214404399887922"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524823987"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^8 NumBlocks=2^8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "256"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "256"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "544"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001132157450367647"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007421825838534079"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011274838826673863"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006150104794311432"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "59520907599.348335"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "476167260794.7867"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6503595673005719"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.626143416"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "545"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011260322986392797"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6144447430000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^10 NumBlocks=2^8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "1024"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "256"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "447"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011234373914988803"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0046157037787769705"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011187847153985792"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0019678845031191957"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "59983715433.66298"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "479869723469.30383"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6554164710846042"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5105567950000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "471"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001116505875962049"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5258857730000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^6 NumBlocks=2^10",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "64"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "1024"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "448"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011217261607142856"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0051297852216839"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011170590700847755"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002961082814438008"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "60076378946.466095"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "480611031571.72876"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6564289657612117"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.510912252"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "471"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011151960174495754"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.525268305"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^8 NumBlocks=2^10",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "256"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "1024"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "447"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011251578970917226"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005034162791339124"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011205025481964396"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0028024348992886213"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "59891754916.5938"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "479134039332.7504"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6544116577425022"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.511320454"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "469"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011173903865854878"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524067611"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^10 NumBlocks=2^10",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "1024"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "1024"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "474"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010600141455696206"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004975251378988354"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001055306057638257"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002145050602183929"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "63591849505.90315"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "508734796047.2252"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6948410129578578"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.51139696"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "499"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001053948106173284"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5259308100000001"
                }
              ]
            }
          ],
          "is_skipped": false
        }
      ]
    },
    {
      "name": "copy_type_sweep",
      "index": 3,
      "min_samples": 10,
      "min_time": 0.5,
      "max_noise": 0.005,
      "skip_time": -1.0,
      "timeout": 15.0,
      "devices": [
        0,
        1
      ],
      "axes": [
        {
          "name": "T",
          "type": "type",
          "flags": "",
          "values": [
            {
              "input_string": "U8",
              "description": "uint8_t",
              "is_active": true
            },
            {
              "input_string": "U16",
              "description": "uint16_t",
              "is_active": true
            },
            {
              "input_string": "U32",
              "description": "uint32_t",
              "is_active": true
            },
            {
              "input_string": "U64",
              "description": "uint64_t",
              "is_active": true
            },
            {
              "input_string": "F32",
              "description": "float",
              "is_active": true
            },
            {
              "input_string": "F64",
              "description": "double",
              "is_active": true
            }
          ]
        }
      ],
      "states": [
        {
          "name": "Device=0 T=U8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "U8"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "3008"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0022984299517952063"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.024386082027668385"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002292998504448446"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.02420260340376021"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "117067436144.95683"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "234134872289.91367"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.2689968661419045"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "6.978260496000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "3009"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0022799289537926114"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "6.889595338"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 T=U16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 1,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "U16"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "352"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001447471568181817"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006559305677944603"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0014420469982380224"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0053741666978658"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "93074447756.55377"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "372297791026.2151"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.4277318371165155"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.51661557"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "364"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0014379604465358862"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5234251630000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 T=U32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 2,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "U32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "960"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010732170854166677"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.010747135588535793"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010678389670948187"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.009483040603054523"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "62845490816.44543"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "502763926531.5634"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5776239964746822"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.050184244"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "961"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010641954020828663"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.027618288"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 T=U64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 3,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "U64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1232"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009393459350649342"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008173389439931525"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009339579984352195"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005779340051383488"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "35927131687.09736"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "574834106993.5577"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6604252148363485"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.1830953560000002"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1233"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009315095380951709"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.155529067"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 T=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 4,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "496"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001073113616935484"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.01045196550212673"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010676812894882687"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.009124433511281603"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "62854771981.7819"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "502838175854.2552"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5777093013031425"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.542738971"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "497"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010637711835818988"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.528728784"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 T=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 5,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1232"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009395226306818184"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008519805595594534"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009341417393804389"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006275440558350422"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "35920065002.399605"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "574721040038.3937"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6602953125441103"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.183265443"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1233"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009317267593676144"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.155949535"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 T=U8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "U8"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2640"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002704848576515149"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008230990844947604"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002700116645206098"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008022518759096101"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "99416244285.81326"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "198832488571.62653"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.2715697232457749"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "7.193921508000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2641"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002695621145895508"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "7.143042223"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 T=U16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 1,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "U16"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "330"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0015221530787878797"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0057375488336673195"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0015174821813901261"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004846658675573777"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "88447646796.77927"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "353790587187.11707"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.48321485356632027"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.508487203"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "347"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001516773410764139"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5263325360000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 T=U32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 2,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "U32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "704"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011323334801136371"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007608036417757464"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001127679999409751"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0063837058695073836"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "59510556217.30105"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "476084449738.4084"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6502464621645656"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.810527479"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "705"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011263038268326028"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.796312915"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 T=U64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 3,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "U64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "478"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010527462217573217"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005281184054309557"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010480330030289645"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002728222354093458"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "32016579538.070763"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "512265272609.1322"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.699663014380917"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.51213991"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "500"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00104523095703125"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.522627688"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 T=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 4,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "464"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011329571594827575"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00779514172858376"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011281946900075874"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006520518090102098"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "59483407070.05869"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "475867256560.46954"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6499498150137532"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.534519304"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "467"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011258338421774624"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5257752330000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 T=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 5,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "478"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010524297447698746"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0053011542236172425"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010477156826142983"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0027421021970940066"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "32026276361.802433"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "512420421788.8389"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6998749204939343"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.511908565"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "501"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010452877204575224"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5236998340000001"
                }
              ]
            }
          ],
          "is_skipped": false
        }
      ]
    },
    {
      "name": "copy_type_conversion_sweep",
      "index": 4,
      "min_samples": 10,
      "min_time": 0.5,
      "max_noise": 0.005,
      "skip_time": -1.0,
      "timeout": 15.0,
      "devices": [
        0,
        1
      ],
      "axes": [
        {
          "name": "In",
          "type": "type",
          "flags": "",
          "values": [
            {
              "input_string": "I8",
              "description": "int8_t",
              "is_active": true
            },
            {
              "input_string": "I16",
              "description": "int16_t",
              "is_active": true
            },
            {
              "input_string": "I32",
              "description": "int32_t",
              "is_active": true
            },
            {
              "input_string": "F32",
              "description": "float",
              "is_active": true
            },
            {
              "input_string": "I64",
              "description": "int64_t",
              "is_active": true
            },
            {
              "input_string": "F64",
              "description": "double",
              "is_active": true
            }
          ]
        },
        {
          "name": "Out",
          "type": "type",
          "flags": "",
          "values": [
            {
              "input_string": "I8",
              "description": "int8_t",
              "is_active": true
            },
            {
              "input_string": "I16",
              "description": "int16_t",
              "is_active": true
            },
            {
              "input_string": "I32",
              "description": "int32_t",
              "is_active": true
            },
            {
              "input_string": "F32",
              "description": "float",
              "is_active": true
            },
            {
              "input_string": "I64",
              "description": "int64_t",
              "is_active": true
            },
            {
              "input_string": "F64",
              "description": "double",
              "is_active": true
            }
          ]
        }
      ],
      "states": [
        {
          "name": "Device=0 In=I8 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=0 In=I8 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 1,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "992"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006600980292338716"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.06265755233269708"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006543757735841723"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.061480066936899634"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "102554016681.31558"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "307662050043.9468"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.35347202440710795"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6754950980000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "993"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006166902596256644"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.613870906"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I8 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 2,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "268435456"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "684"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007370927309941522"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008729576791697422"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007317126547558279"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004720480993831976"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "91714778422.67767"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "458573892113.3883"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5268541959023303"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5183597600000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "723"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007290440898383471"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5271070320000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I8 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 3,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "268435456"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "680"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007416632955882347"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00842788883853582"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007362919512917023"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004273389706237406"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "91144367234.04161"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "455721836170.20807"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5235774772176104"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.518484646"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "712"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007338700883843925"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.522523862"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I8 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 4,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "536870912"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "528"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0012047073446969693"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.009645329133519535"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011993323018153505"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0085331592060071"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "55955187647.6784"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "503596688829.1056"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5785807546290276"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.647215045"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "529"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001196625677083526"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6339911500000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I8 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 5,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "536870912"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1200"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001180585506666666"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.010080936476778664"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001175182694693406"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008979393908657816"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "57105047838.97287"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "513945430550.75586"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5904703935555559"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.4422983710000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1201"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011731425545594744"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.4168893610000002"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I16 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 6,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=I16 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 7,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=0 In=I16 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 8,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "33554432"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1696"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00043055614622641435"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.01653471806668262"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00042517247028157185"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.010615851149343741"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "78919578160.31331"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "473517468961.8799"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5440228273918657"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.766234603"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1697"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004232069438320117"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.7227836270000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I16 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 9,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "33554432"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1184"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00043288785641891876"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.01493008601829662"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004275269453740999"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008146558252326382"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "78484952499.63434"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "470909714997.8061"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5410267865324059"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.537378945"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1238"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000424921975574894"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.526063107"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I16 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 10,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "33554432"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "268435456"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "768"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006571356510416664"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.010841823646108464"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006517510409466911"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007001797627972981"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "51483511175.15826"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "514835111751.5826"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5914925456704763"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5206853530000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "811"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006489950196516646"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.526342823"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I16 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 11,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "33554432"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "268435456"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "768"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006567598033854167"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.010681410148370487"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006514065422428152"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006845472386064585"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "51510738416.09102"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "515107384160.9102"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5918053586407517"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5204057160000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "805"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006483509893002717"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.521930972"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I32 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 12,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=I32 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 13,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=I32 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 14,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=0 In=I32 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 15,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1904"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002683022746848742"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0242735351289231"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00026296673859117433"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.013347568443463995"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "63799764524.90815"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "510398116199.2652"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5863948945304058"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.550977791"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2015"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002597611117303815"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.523427203"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I32 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 16,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1328"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000382968452560241"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.016207309597693925"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00037759019212281323"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0077904180984165565"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "44432340537.44468"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "533188086449.3362"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6125782243213881"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5365996590000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1396"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003753899079680784"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524052128"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I32 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 17,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1328"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00038311062575301184"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.016679525157534167"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00037773971044155724"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00868318074979036"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "44414753165.31679"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "532977037983.8014"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6123357513600659"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.53682523"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1367"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003756192367096059"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.513480453"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=F32 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 18,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=F32 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 19,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=F32 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 20,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1904"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00026897186554621826"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.02368215369603371"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00026361388154327934"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012168663554835448"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "63643143152.328896"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "509145145218.63116"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5849553598559641"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5524873680000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1991"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00026033155670242655"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5183284650000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=F32 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 21,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=0 In=F32 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 22,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1328"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003830804947289157"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.01669586098492127"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003777089391846278"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008791389028513721"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "44418371554.07946"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "533020458648.95355"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6123856372345514"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.536636844"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1404"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003754535468555244"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.527143864"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=F32 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 23,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1328"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003830802665662652"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.01641959194869226"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00037772002358393717"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008238450770780798"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "44417068072.833466"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "533004816874.0016"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6123676664453144"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.536685893"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1389"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000375325125357159"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.521334187"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I64 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 24,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=I64 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 25,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=I64 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 26,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=I64 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 27,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=I64 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 28,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=0 In=I64 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 29,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "8388608"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2112"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002422139554924242"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.02400462331990132"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00023683451699572973"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007866889921134117"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "35419701935.386604"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "566715230966.1857"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6510974620475479"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.556787641"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2225"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00023412507132198032"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5209357530000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=F64 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 30,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=F64 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 31,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=F64 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 32,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=F64 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 33,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=F64 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 34,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "8388608"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2112"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00024282649337121185"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.024008982414037136"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002374703656091845"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008267454870626736"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "35324862445.386154"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "565197799126.1785"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6493540890695985"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5575585390000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2214"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00023455057945354847"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.519302479"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=F64 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 35,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=1 In=I8 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=1 In=I8 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 1,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "992"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006824859284274195"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.032440596768964644"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006776485806030618"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.03134186379501587"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "99031955383.5376"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "297095866150.6128"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.40577997452826264"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.696101135"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "993"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006594375443362513"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6568292870000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I8 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 2,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "268435456"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "592"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008640237381756752"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.01007729538332117"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008593635128156565"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008503073738172521"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "78091358312.52779"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "390456791562.639"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5332943503641813"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.522774643"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "615"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008578066445947664"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.527563349"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I8 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 3,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "268435456"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "656"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008614595929878056"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0098819558217369"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008567909279429336"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008239605399217536"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "78325834006.11096"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "391629170030.5548"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5348956102908583"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.577643271"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "657"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008551398322462489"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.562343075"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I8 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 4,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "536870912"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "528"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0014573860359848496"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006512604739977204"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0014527478784774298"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00567750642698368"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "46194432629.517426"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "415749893665.65686"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5678402175284868"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.779591082"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "529"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0014509199143357438"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.7691349590000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I8 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 5,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "536870912"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "352"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0014603711335227268"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006314392964515164"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0014557048187337147"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005429185609780399"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "46100598923.87834"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "414905390314.905"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5666867765446146"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.520741476"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "363"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0014527880455836777"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.527373104"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I16 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 6,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=I16 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 7,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=1 In=I16 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 8,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "33554432"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1104"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004611589565217395"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012940549047826096"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004564886665700571"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007894471093785426"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "73505509462.30429"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "441033056773.82574"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6023725097981667"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.530236417"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1149"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00045453528926308207"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5222721800000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I16 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 9,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "33554432"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1104"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004598074438405802"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012817089346835498"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004551599129116618"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007723313560215716"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "73720094955.97276"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "442320569735.83655"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6041310229128012"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.528748192"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1166"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00045311976542399224"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5283480380000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I16 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 10,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "33554432"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "268435456"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "672"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007533759211309518"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00846442255799797"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007486925714959693"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005711239714249503"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "44817370009.36792"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "448173700093.6792"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6121253552415854"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.518973692"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "701"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007453038627853066"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5224711350000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I16 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 11,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "33554432"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "268435456"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "672"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007515454092261914"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008392859436579346"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000746849381497928"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005530757922319482"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "44927977221.72726"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "449279772217.2726"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6136360525257766"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.517848211"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "704"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007438007701526989"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.523646363"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I32 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 12,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=I32 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 13,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=I32 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 14,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=1 In=I32 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 15,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1840"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00027774970760869537"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.01802281728547624"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00027309293929973365"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0057206437926147526"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "61434089226.254715"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "491472713810.0377"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6712640868253356"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.547010069"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1923"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002714794236300702"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.522066944"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I32 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 16,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1195"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00042314036485355624"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012144571947569476"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00041846681174872806"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00470128993484091"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "40092106539.79896"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "481105278477.5875"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6571040188996771"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5286744840000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1263"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00041609304251410327"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.525537319"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I32 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 17,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1195"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004230686694560669"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012123446476677473"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000418410443611225"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004701276954550611"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "40097507737.136955"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "481170092845.6435"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6571925437686346"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.528583919"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1258"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004162648114566773"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.523674003"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=F32 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 18,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=F32 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 19,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=F32 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 20,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1808"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00028176399834070837"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.021188068496395096"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002770714691914288"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012675642708623109"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "60551943687.88875"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "484415549503.11"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6616252588274557"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.544610159"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1911"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00027552812178056315"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.526544561"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=F32 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 21,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=1 In=F32 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 22,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1195"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00042314520585774067"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0121383962955979"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004184789956862957"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004742822949323976"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "40090939265.62733"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "481091271187.5279"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6570848874392591"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.528606066"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1257"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00041616969385503683"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.523135939"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=F32 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 23,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1195"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004232182125523013"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012193893187009835"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00041854015763334633"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004766137974469347"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "40085080712.12928"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "481020968545.5514"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6569888665668042"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.528650917"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1253"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004162007763399092"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5215109210000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I64 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 24,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=I64 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 25,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=I64 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 26,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=I64 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 27,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=I64 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 28,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=1 In=I64 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 29,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "8388608"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1909"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002666127674174964"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.018360326425103806"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00026193604216705505"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004085453563964175"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "32025405631.84502"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "512406490109.5203"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6998558923043056"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.546196184"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1976"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002601132721070819"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5139936970000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=F64 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 30,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=F64 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 31,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=F64 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 32,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=F64 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 33,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=F64 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 34,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "8388608"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1910"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002665624010471204"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.018356743233435932"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00026189029580323475"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004090112866661142"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "32030999752.287834"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "512495996036.60535"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6999781414398565"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.546475312"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2007"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00026003379042491905"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.521898494"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=F64 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 35,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        }
      ]
    }
  ]
}