nvbench/python/scripts/test_cmp.json

{
  "meta": {
    "argv": [
      "bin/nvbench.example.axes",
      "--json",
      "/home/av/code/src/nvbench/scripts/test_cmp.json"
    ],
    "version": {
      "json": {
        "major": 1,
        "minor": 0,
        "patch": 0,
        "string": "1.0.0"
      },
      "nvbench": {
        "major": 0,
        "minor": 1,
        "patch": 0,
        "string": "0.1.0",
        "git_branch": "walltime_reports",
        "git_sha": "348acbd6eb752a87e15c28fe1ad1cb827eaaadec",
        "git_version": "old-cmake-63-g348acbd",
        "git_is_dirty": false
      }
    }
  },
  "devices": [
    {
      "id": 0,
      "name": "Quadro GV100",
      "sm_version": 700,
      "ptx_version": 700,
      "sm_default_clock_rate": 1627000000,
      "number_of_sms": 80,
      "max_blocks_per_sm": 32,
      "max_threads_per_sm": 2048,
      "max_threads_per_block": 1024,
      "registers_per_sm": 65536,
      "registers_per_block": 65536,
      "global_memory_size": 34086060032,
      "global_memory_bus_peak_clock_rate": 850000000,
      "global_memory_bus_width": 4096,
      "global_memory_bus_bandwidth": 870400000000,
      "l2_cache_size": 6291456,
      "shared_memory_per_sm": 98304,
      "shared_memory_per_block": 49152,
      "ecc_state": false
    },
    {
      "id": 1,
      "name": "Quadro GP100",
      "sm_version": 600,
      "ptx_version": 600,
      "sm_default_clock_rate": 1442500000,
      "number_of_sms": 56,
      "max_blocks_per_sm": 32,
      "max_threads_per_sm": 2048,
      "max_threads_per_block": 1024,
      "registers_per_sm": 65536,
      "registers_per_block": 65536,
      "global_memory_size": 17069309952,
      "global_memory_bus_peak_clock_rate": 715000000,
      "global_memory_bus_width": 4096,
      "global_memory_bus_bandwidth": 732160000000,
      "l2_cache_size": 4194304,
      "shared_memory_per_sm": 65536,
      "shared_memory_per_block": 49152,
      "ecc_state": false
    }
  ],
  "benchmarks": [
    {
      "name": "simple",
      "index": 0,
      "min_samples": 10,
      "min_time": 0.5,
      "max_noise": 0.005,
      "skip_time": -1.0,
      "timeout": 15.0,
      "devices": [
        0,
        1
      ],
      "axes": null,
      "states": [
        {
          "name": "Device=0",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": null,
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "499"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001009524801603207"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006144561739025865"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010034006580799991"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0005237510233783218"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.514396598"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "524"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001001475909284053"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524788153"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": null,
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "499"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010077174468937882"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00494341955894122"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010027929121602258"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00033287816568109313"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5123603010000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "524"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010014740456151597"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524795703"
                }
              ]
            }
          ],
          "is_skipped": false
        }
      ]
    },
    {
      "name": "single_float64_axis",
      "index": 1,
      "min_samples": 10,
      "min_time": 0.5,
      "max_noise": 0.005,
      "skip_time": -1.0,
      "timeout": 15.0,
      "devices": [
        0,
        1
      ],
      "axes": [
        {
          "name": "Duration",
          "type": "float64",
          "flags": "",
          "values": [
            {
              "input_string": "0",
              "description": "",
              "value": 0.0
            },
            {
              "input_string": "0.0001",
              "description": "",
              "value": 0.0001
            },
            {
              "input_string": "0.0002",
              "description": "",
              "value": 0.0002
            },
            {
              "input_string": "0.0003",
              "description": "",
              "value": 0.00030000000000000003
            },
            {
              "input_string": "0.0004",
              "description": "",
              "value": 0.0004
            },
            {
              "input_string": "0.0005",
              "description": "",
              "value": 0.0005
            },
            {
              "input_string": "0.0006",
              "description": "",
              "value": 0.0006000000000000001
            },
            {
              "input_string": "0.0007",
              "description": "",
              "value": 0.0007000000000000001
            },
            {
              "input_string": "0.0008",
              "description": "",
              "value": 0.0008000000000000001
            },
            {
              "input_string": "0.0009",
              "description": "",
              "value": 0.0009000000000000002
            },
            {
              "input_string": "0.001",
              "description": "",
              "value": 0.0010000000000000002
            }
          ]
        }
      ],
      "states": [
        {
          "name": "Device=0 Duration=0",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "127632"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "9.535606282123409e-06"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.4448218958078975"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "3.918024581663389e-06"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.14066541529910018"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "11.513563003000002"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "274328"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.8226457245237315e-06"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.500101118"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0001",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0001"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "4853"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00010851134411704107"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.053377272961503276"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00010302993536069301"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004807683479660842"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6373502280000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "5088"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00010137620362095862"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.51581551"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0002",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0002"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2459"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002088847271248475"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.027095357105136896"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00020339123081777852"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002426402384835198"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5670174410000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2582"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00020172880307174672"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.520878249"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0003",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.00030000000000000003"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1652"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003082859001210656"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.01842186373388549"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003027270989578126"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0016270299573856555"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.544737606"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1736"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00030105657621462773"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.522648918"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0004",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0004"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1241"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00040859692667203864"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.013800282471048258"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004030542842665574"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0012342926945401174"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.533285391"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1304"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00040140879812416123"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5234506480000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0005",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0005"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "994"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0005090076327967808"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.01104211789520747"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0005034694101968762"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000960945456149481"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.526845475"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1044"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0005017609577982818"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5238518600000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0006",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0006000000000000001"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "830"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006083229987951809"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.009259805546541143"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006027641820620359"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008125705181484989"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.52231507"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "872"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006010903174724053"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5241642790000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0007",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0007000000000000001"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "712"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007086338553370777"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007928264539185437"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007030805292424196"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007112507950799924"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.519468829"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "748"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007014426981064088"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.52469385"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0008",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0008000000000000001"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "623"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008089985730337072"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006971030802740222"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008034196651957732"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006306208005906063"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5170688250000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "654"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008017951428707951"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5243872230000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.0009",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0009000000000000002"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "554"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009083576299638984"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006199510137107782"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009027842496276245"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0005444417680564487"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.514841552"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "582"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009011235712320125"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524466611"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 Duration=0.001",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0010000000000000002"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "499"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010087251282565122"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005573661860035435"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010031565917517711"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004852012011897464"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5138195830000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "524"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010014756763254413"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524785882"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "153013"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "7.705666139478051e-06"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.5262458153177543"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "3.057407826310601e-06"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.045574170376734044"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "15.000211589000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "369906"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.3516989302429717e-06"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.500042922"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0001",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0001"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "4879"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00010713845111703245"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.045460323768744995"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00010249834163043719"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0030010311127595573"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6230727620000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "5081"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00010137619922490036"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5151083350000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0002",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0002"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2465"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00020751516592292123"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.023007065837400455"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00020286964052951872"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0014997658908938753"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.559679316"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2588"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00020172862033755555"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.522088477"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0003",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.00030000000000000003"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1655"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003068471528700908"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.015443555151131"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00030220268294890517"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010498159491600372"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.539562934"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1736"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003010567520071284"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.52264897"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0004",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0004"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1243"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00040717730973451277"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.011630958382375049"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00040252058700697966"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007711533484593173"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5294895540000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1305"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004014086726981561"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.52385337"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0005",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0005"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "995"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0005075514221105535"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.009291726931158024"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0005029017407690461"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006019586171273846"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.523705419"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1044"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0005017608408726951"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5238529980000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0006",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0006000000000000001"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "831"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006068636666666669"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00775159368655319"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000602217434115358"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0005211064062823375"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5198631410000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "873"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006010892503176905"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524771732"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0007",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0007000000000000001"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "712"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007072028300561799"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006649464561878749"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007025522259848826"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004304629385174026"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.516796464"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "748"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007014422085195939"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524693347"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0008",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0008000000000000001"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "623"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008076071910112361"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005864235047342223"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008029232501600935"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000386286201448909"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.514722272"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "655"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008017945267771947"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5251914990000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.0009",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0009000000000000002"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "555"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009069257099099103"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005199849951312571"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009022579880448067"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000339409683584611"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.513695142"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "582"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00090112220790378"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5244711550000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 Duration=0.001",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "Duration",
              "type": "float64",
              "value": "0.0010000000000000002"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "499"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010072258977955914"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004633193202486146"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010026042473340073"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003115372302150914"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.511907711"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "524"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010014748609703007"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524787242"
                }
              ]
            }
          ],
          "is_skipped": false
        }
      ]
    },
    {
      "name": "copy_sweep_grid_shape",
      "index": 2,
      "min_samples": 10,
      "min_time": 0.5,
      "max_noise": 0.005,
      "skip_time": -1.0,
      "timeout": 15.0,
      "devices": [
        0,
        1
      ],
      "axes": [
        {
          "name": "BlockSize",
          "type": "int64",
          "flags": "pow2",
          "values": [
            {
              "input_string": "6",
              "description": "2^6 = 64",
              "value": 64
            },
            {
              "input_string": "8",
              "description": "2^8 = 256",
              "value": 256
            },
            {
              "input_string": "10",
              "description": "2^10 = 1024",
              "value": 1024
            }
          ]
        },
        {
          "name": "NumBlocks",
          "type": "int64",
          "flags": "pow2",
          "values": [
            {
              "input_string": "6",
              "description": "2^6 = 64",
              "value": 64
            },
            {
              "input_string": "8",
              "description": "2^8 = 256",
              "value": 256
            },
            {
              "input_string": "10",
              "description": "2^10 = 1024",
              "value": 1024
            }
          ]
        }
      ],
      "states": [
        {
          "name": "Device=0 BlockSize=2^6 NumBlocks=2^6",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "64"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "78"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00648948455128205"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0015111507522308748"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006484057010748448"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0012531664584969381"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "10349826333.845528"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "82798610670.76422"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.09512708027431552"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5077619640000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "81"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006481402361834491"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5250069540000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 BlockSize=2^8 NumBlocks=2^6",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "256"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "672"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00217197076636905"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0074534188597851336"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002166515097376846"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00701989634431853"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "30975488738.229183"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "247803909905.83347"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.2847011832557829"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.473432187"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "673"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002163565506021122"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.4606610070000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 BlockSize=2^10 NumBlocks=2^6",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "1024"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "688"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010916693808139535"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.01306842599006877"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001086250233269015"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012078568140597113"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "61780298815.71512"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "494242390525.72095"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.567833628820911"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.765170478"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "689"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010836307621832676"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.748836308"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 BlockSize=2^6 NumBlocks=2^8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "64"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "256"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "231"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002171097186147186"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00443956157556455"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0021655962921324217"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.003655102168422409"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "30988630819.05223"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "247909046552.41785"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.28482197443981827"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.506240788"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "243"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002161031840760031"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.525142297"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 BlockSize=2^8 NumBlocks=2^8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "256"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "256"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "736"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010725499320652177"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.011413433377036444"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001067108783223058"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.010207282915832727"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "62888493708.49215"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "503107949667.9372"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5780192436442293"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.804676228"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "737"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010644761438770877"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.787106834"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 BlockSize=2^10 NumBlocks=2^8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "1024"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "256"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1488"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009658611908602143"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007916411658808452"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009604295065966908"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005543866520742756"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "69873804937.337"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "558990439498.696"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.642222471850524"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.468436431"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1489"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009584573153443874"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.4370937890000002"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 BlockSize=2^6 NumBlocks=2^10",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "64"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "1024"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "528"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010655318598484856"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.010411330423168705"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010600458776408978"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.009061611791593436"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "63307509057.38994"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "506460072459.1195"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.581870487659834"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.573568937"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "529"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010571805049431119"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.559696812"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 BlockSize=2^8 NumBlocks=2^10",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "256"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "1024"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1032"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009617264147286825"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007568406287684157"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009562815504018629"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0049994946059467"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "70176888774.8577"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "561415110198.8616"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6450081688865598"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.013943156"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1033"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009539899551395297"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.9906108020000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 BlockSize=2^10 NumBlocks=2^10",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "1024"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "1024"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "560"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010253841303571433"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.03170958999602246"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010199987426400187"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.031247624116965786"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "65793085025.09035"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "526344680200.7228"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6047158550100216"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5859060660000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "561"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010093532926046065"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5665410750000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^6 NumBlocks=2^6",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "64"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2245"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0066631781487750605"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.010029284827333777"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006658390919190473"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.009997863006145854"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "10078841091.558964"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "80630728732.47171"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.11012719724168449"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "15.004037418000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2246"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006649344001406553"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "14.956094204000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^8 NumBlocks=2^6",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "256"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "218"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002299290371559632"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0030536390935653273"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002294595665887955"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002251444609090054"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "29246487735.359"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "233971901882.872"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.3195638957097793"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5053657460000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "228"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002293837965580455"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.523009414"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^10 NumBlocks=2^6",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "1024"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "426"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001178968861502347"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005201397517740588"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011742734310212829"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0033150798656458847"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "57149265432.69776"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "457194123461.5821"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6244456450251067"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5103190240000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "450"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011726047092013889"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.527687784"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^6 NumBlocks=2^8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "64"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "256"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "226"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002219887185840708"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0026061881909203283"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002215178051881032"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0015054811845863602"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "30295020277.49602"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "242360162219.96817"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.33102076352159115"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.506002865"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "237"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0022142488503757913"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524792129"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^8 NumBlocks=2^8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "256"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "256"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "448"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011318572321428575"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007609392746712896"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011271811462938788"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0063649890038617206"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "59536893622.33475"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "476295148978.678"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6505342397545317"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5155772900000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "470"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011258403372257314"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.529160021"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^10 NumBlocks=2^8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "1024"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "256"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "447"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011232368366890376"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004611911863103576"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001118592285736562"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0019864118812352185"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "59994034337.37313"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "479952274698.98505"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.655529221343675"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.510514425"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "470"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011164527406083776"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.52474862"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^6 NumBlocks=2^10",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "64"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "1024"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "448"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011216608169642855"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005148879095566737"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011169912165829119"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002975557170232136"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "60080028386.70366"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "480640227093.6293"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6564688416379333"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.510971747"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "470"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011154764378324467"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524288288"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^8 NumBlocks=2^10",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "256"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "1024"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "447"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011246830559284123"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004922192808378086"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011200362225240248"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002646502354772987"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "59916690773.418724"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "479333526187.3498"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6546841212130542"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.511144538"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "469"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011177327820995468"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5242309390000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 BlockSize=2^10 NumBlocks=2^10",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "BlockSize",
              "type": "int64",
              "value": "1024"
            },
            {
              "name": "NumBlocks",
              "type": "int64",
              "value": "1024"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "474"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010598897257383965"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004913062706223566"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001055195342387831"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0020503329663902545"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "63598521813.16255"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "508788174505.3004"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6949139184130524"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.511291385"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "498"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010538602162556477"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524838223"
                }
              ]
            }
          ],
          "is_skipped": false
        }
      ]
    },
    {
      "name": "copy_type_sweep",
      "index": 3,
      "min_samples": 10,
      "min_time": 0.5,
      "max_noise": 0.005,
      "skip_time": -1.0,
      "timeout": 15.0,
      "devices": [
        0,
        1
      ],
      "axes": [
        {
          "name": "T",
          "type": "type",
          "flags": "",
          "values": [
            {
              "input_string": "U8",
              "description": "uint8_t",
              "is_active": true
            },
            {
              "input_string": "U16",
              "description": "uint16_t",
              "is_active": true
            },
            {
              "input_string": "U32",
              "description": "uint32_t",
              "is_active": true
            },
            {
              "input_string": "U64",
              "description": "uint64_t",
              "is_active": true
            },
            {
              "input_string": "F32",
              "description": "float",
              "is_active": true
            },
            {
              "input_string": "F64",
              "description": "double",
              "is_active": true
            }
          ]
        }
      ],
      "states": [
        {
          "name": "Device=0 T=U8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "U8"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2992"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0022994002396390365"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.024778400174351137"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002293938610882044"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.02459574709695746"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "117019459338.00893"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "234038918676.01785"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.2688866253171161"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "6.944152369"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2993"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0022801307408338873"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "6.853598372"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 T=U16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 1,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "U16"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "672"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0014471324925595243"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006960808950083016"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001441753045966228"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005894275617037584"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "93093424269.51526"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "372373697078.06104"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.4278190453562282"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.98617708"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "673"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001438309451800399"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.9712327470000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 T=U32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 2,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "U32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "848"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001072196766509434"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.011171612715506738"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010668103765204251"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.009987563873112983"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "62906084789.7697"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "503248678318.1576"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5781809263765597"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.9268544190000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "849"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001064370134974818"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.9073867080000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 T=U64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 3,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "U64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1568"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009390030325255086"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008249904375540816"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009335942644701952"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005885112868111755"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "35941129114.62859"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "575058065834.0575"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6606825204894962"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.505405182"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1569"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009319373003763345"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.4727063310000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 T=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 4,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "752"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010726744441489362"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.01103458165791857"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010672343821918702"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.009783271621840188"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "62881092588.27738"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "503048740706.21906"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5779512186422553"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.822184149"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "753"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001063365562503555"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.803427655"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 T=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 5,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "544"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009393335257352945"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007706533640467741"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000933937587282237"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005096920917422438"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "35927916872.52203"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "574846669960.3525"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6604396483919491"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5221495780000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "565"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0009305206028761061"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.525756552"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 T=U8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "U8"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2784"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0027056495269396513"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.009323042699490573"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0027009093115727078"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.009134230706566165"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "99387067477.54266"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "198774134955.08533"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.2714900226112944"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "7.588833747000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2785"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002695659536947251"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "7.532905504"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 T=U16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 1,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "U16"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "330"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0015226199969696965"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005622755807814305"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0015179373560529775"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004685161525974869"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "88421124537.70831"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "353684498150.83325"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.4830699548607316"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.508664443"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "349"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0015155031045733347"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.528926318"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 T=U32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 2,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "U32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "528"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011321445473484848"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007683380682909642"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011274604856064824"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006460085786164455"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "59522142777.2707"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "476177142218.1656"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6503730635628354"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.607756878"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "529"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011264972348745013"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5965697010000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 T=U64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 3,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "U64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "478"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010525728723849374"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005381193387193611"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00104785640469156"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0028679450954259256"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "32021975386.863106"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "512351606189.8097"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6997809306569734"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.512086032"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "500"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010454827880859374"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5227592090000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 T=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 4,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "528"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011322722803030294"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0076927816018557355"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011275246077866272"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006425647618712464"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "59518757760.62857"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "476150062085.02856"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.650336076929945"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.608051446"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "529"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011258555051284391"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.596245555"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 T=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 5,
          "axis_values": [
            {
              "name": "T",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "478"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010523593117154819"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005286902872056256"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001047618542256216"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.002706934586546566"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "32029245996.099976"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "512467935937.5996"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6999398163483386"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.51215675"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "503"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0010453338319691226"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.525822574"
                }
              ]
            }
          ],
          "is_skipped": false
        }
      ]
    },
    {
      "name": "copy_type_conversion_sweep",
      "index": 4,
      "min_samples": 10,
      "min_time": 0.5,
      "max_noise": 0.005,
      "skip_time": -1.0,
      "timeout": 15.0,
      "devices": [
        0,
        1
      ],
      "axes": [
        {
          "name": "In",
          "type": "type",
          "flags": "",
          "values": [
            {
              "input_string": "I8",
              "description": "int8_t",
              "is_active": true
            },
            {
              "input_string": "I16",
              "description": "int16_t",
              "is_active": true
            },
            {
              "input_string": "I32",
              "description": "int32_t",
              "is_active": true
            },
            {
              "input_string": "F32",
              "description": "float",
              "is_active": true
            },
            {
              "input_string": "I64",
              "description": "int64_t",
              "is_active": true
            },
            {
              "input_string": "F64",
              "description": "double",
              "is_active": true
            }
          ]
        },
        {
          "name": "Out",
          "type": "type",
          "flags": "",
          "values": [
            {
              "input_string": "I8",
              "description": "int8_t",
              "is_active": true
            },
            {
              "input_string": "I16",
              "description": "int16_t",
              "is_active": true
            },
            {
              "input_string": "I32",
              "description": "int32_t",
              "is_active": true
            },
            {
              "input_string": "F32",
              "description": "float",
              "is_active": true
            },
            {
              "input_string": "I64",
              "description": "int64_t",
              "is_active": true
            },
            {
              "input_string": "F64",
              "description": "double",
              "is_active": true
            }
          ]
        }
      ],
      "states": [
        {
          "name": "Device=0 In=I8 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=0 In=I8 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 1,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1008"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006586167946428575"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.05914716011832632"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006528769518056576"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.057982657554439924"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "102789451847.54562"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "308368355542.63684"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.35428349671718384"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6850768"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1009"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006166892571539062"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.623953807"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I8 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 2,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "268435456"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "684"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007371795058479537"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008847285785468822"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000731761917384746"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004879904384809398"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "91708604131.57506"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "458543020657.87524"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5268187277778897"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5183809100000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "719"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000729479623935153"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5245085300000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I8 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 3,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "268435456"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "680"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007416148632352943"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00847915084559806"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000736235386308502"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0043064135466205815"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "91151369858.06279"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "455756849290.3139"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5236177036883202"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5183531140000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "718"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007336860167946988"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.526798324"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I8 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 4,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "536870912"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "528"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0012050906723484857"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.009907122479821073"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011996847262436706"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008827766250664237"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "55938750016.53507"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "503448750148.8156"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5784107883143562"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.647279072"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "529"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011969163755838723"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.634178896"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I8 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 5,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "536870912"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1040"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011804124500000013"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00957111143535521"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011749697549985022"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00836195198403357"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "57115396983.206215"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "514038572848.85596"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5905774044678952"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.249563157"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1041"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0011735446663800626"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "1.2279065690000002"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I16 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 6,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=I16 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 7,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=0 In=I16 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 8,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "33554432"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1632"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00043062034803921626"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.01672483523731806"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00042520409690983404"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.010836224516018789"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "78913708131.82764"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "473482248790.9659"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5439823630410913"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.737060063"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1633"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004232788786191731"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.695498727"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I16 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 9,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "33554432"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1184"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00043284459121621524"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.015105337896417907"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004274506211733894"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008262516586090977"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "78498966518.9634"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "470993799113.7804"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5411233905259426"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5372377770000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1232"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004249965618183087"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.523606656"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I16 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 10,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "33554432"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "268435456"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "768"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006572663450520837"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.011014295443548292"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006518266665128367"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007223480904816997"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "51477538007.934814"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "514775380079.34814"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5914239201279275"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.520738605"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "796"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006487323243414338"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.516403752"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I16 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 11,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "33554432"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "268435456"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "880"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006565674102272736"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.01097889608017816"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006511251280253577"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007150441437138621"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "51533001194.03968"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "515330011940.39685"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5920611350418162"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.596114039"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "881"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006487893111724723"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.572324006"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I32 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 12,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=I32 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 13,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=I32 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 14,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=0 In=I32 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 15,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1904"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00026858391123949583"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.024532996731547897"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002631697807648852"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0133776106644627"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "63750541385.25386"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "510004331082.0309"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5859424759674068"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.551536212"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1969"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00025964095085147915"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5112442500000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I32 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 16,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1328"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00038312173493975965"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0162544382499927"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00037769761349422534"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00766160749599669"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "44419703489.221306"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "533036441870.65564"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6124040003109554"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.536583359"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1388"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003753291852879593"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.520968135"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I32 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 17,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1328"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00038320030346385516"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.016795599989722854"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00037778021639819085"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008713885990809477"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "44409990972.94271"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "532919891675.31256"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6122700961343205"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.536768873"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1377"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003755766647660222"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.517180861"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=F32 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 18,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=F32 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 19,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=F32 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 20,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1904"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00026906845745798324"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.023966161873692115"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00026363512487033393"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012223432341603665"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "63638014882.31772"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "509104119058.54175"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5849082250213026"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.552411471"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1961"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002602678033419253"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5103970170000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=F32 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 21,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=0 In=F32 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 22,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1328"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00038308867695783106"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.016918671591625058"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003776144570480286"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008723758091138187"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "44429485383.46378"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "533153824601.56537"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6125388609852543"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5366656240000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1396"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003754830346749642"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5241867800000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=F32 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 23,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1328"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00038299202560240965"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0167760658438423"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0003775633729949433"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008684523141297206"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "44435496660.91339"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "533225959930.9607"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6126217370530339"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5364044290000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1404"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00037541050924534816"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5270894850000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=I64 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 24,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=I64 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 25,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=I64 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 26,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=I64 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 27,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=I64 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 28,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=0 In=I64 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 29,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "8388608"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2112"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002423827249053035"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.02416424179820878"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00023696183533210337"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007872204592971034"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "35400671117.538055"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "566410737880.6089"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6507476308370966"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5565369530000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2205"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00023414492098922904"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5163003700000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=F64 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 30,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=F64 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 31,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=F64 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 32,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=F64 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 33,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=0 In=F64 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 34,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "8388608"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2112"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00024286170075757575"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.02414779678250403"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00023745110798909405"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008091753071026355"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "35327727341.60197"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "565243637465.6315"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6494067526029773"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5576062110000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2233"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00023462851593113247"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.523938348"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=0 In=F64 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 0,
          "type_config_index": 35,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=1 In=I8 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 0,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=1 In=I8 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 1,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1024"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000683441244140624"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.03316062878230732"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0006786162495845936"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.03212477441508221"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "98890741329.99298"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "296672223989.97894"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.4052013548814179"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.719493135"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1025"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000659710607877592"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.678423381"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I8 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 2,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "268435456"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "592"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008640211064189187"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00944028164858259"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008593624308705327"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007736902908752538"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "78091456630.25882"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "390457283151.29407"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5332950217866232"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5227107280000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "614"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008578363971523819"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5267261910000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I8 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 3,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "268435456"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "592"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0008612816165540544"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00975919715067052"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000856635513035832"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008096027798987914"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "78340044253.09521"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "391700221265.4761"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5349926536077853"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.521050054"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "599"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000855541095510747"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5124821390000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I8 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 4,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "536870912"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "672"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0014581301889880955"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006071682334960142"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0014534626205762236"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005154971568436681"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "46171716458.311646"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "415545448124.8048"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5675609813767548"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.9926510430000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "673"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.001450536531289656"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.9791953990000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I8 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 5,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I8"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "536870912"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "352"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0014604223210227273"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.006352174295896549"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0014556942754848428"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005460161744719934"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "46100932819.597916"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "414908395376.3812"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5666908809227235"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.520818273"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "361"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0014522860624783588"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524288878"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I16 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 6,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=I16 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 7,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=1 In=I16 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 8,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "33554432"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1104"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00046094446557971044"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012758359369013577"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004563006377252548"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007641740734756292"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "73535799045.2856"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "441214794271.71356"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6026207308125459"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.529887408"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1140"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.000454689802203262"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.518361091"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I16 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 9,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "33554432"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1104"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004598212318840582"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012784453789403875"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004551693620025247"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.007660841909416756"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "73718564563.25784"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "442311387379.54706"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6041184814515229"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5287007920000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1154"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004530425443599707"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5228252680000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I16 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 10,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "33554432"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "268435456"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "672"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007534447321428569"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008541470607558692"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007488120960160388"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005872606532245015"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "44810216312.63993"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "448102163126.3993"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6120276485008732"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5189650410000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "701"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007457322407721113"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.522771435"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I16 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 11,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I16"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "33554432"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "268435456"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "672"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007513076056547618"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.008183588591017211"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007466521440517336"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005268370256387482"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "44939845505.452805"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "449398455054.528"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6137981521177448"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.517594289"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "705"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0007440871218417554"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.524597968"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I32 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 12,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=I32 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 13,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=I32 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 14,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=1 In=I32 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 15,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1840"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002776829885869563"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.017861418449176162"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002730688870924969"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.005670388220307151"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "61439500408.250595"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "491516003266.00476"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6713232125027382"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.546509219"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1927"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002714873839983621"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.523169341"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I32 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 16,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1196"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004229804180602015"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012054556683918517"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004183483349290177"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004710258623264892"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "40103460679.117165"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "481241528149.406"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6572901116551109"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.528636114"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1252"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004159514698357628"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.520784637"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I32 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 17,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1195"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004231393305439326"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012074190328924192"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004185113171653266"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004772051537598408"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "40087843056.75637"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "481054116681.0764"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6570341410089002"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.528483731"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1258"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00041619300539050074"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.523585816"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=F32 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 18,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=F32 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 19,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=F32 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 20,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1808"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00028167357632743345"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.021202385815789038"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00027701768152151984"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012884867104306086"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "60563700872.27331"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "484509606978.18646"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6617537245659234"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5443872160000001"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1858"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002751795970970129"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.5112970170000001"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=F32 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 21,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=1 In=F32 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 22,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1195"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00042308463263598364"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012089671437059933"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004184455500237615"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004773399327485907"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "40094143668.26772"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "481129724019.21265"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.657137407150367"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.528290021"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1264"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004161183321023289"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.525988793"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=F32 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 23,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F32"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "16777216"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "134217728"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1195"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004230846794979085"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.012109978274913669"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0004184373557567601"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004746400070144573"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "40094928832.67498"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "481139145992.0998"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6571502758851887"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.528360885"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1255"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00041616955031436757"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.522306598"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=I64 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 24,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=I64 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 25,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=I64 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 26,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=I64 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 27,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=I64 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 28,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        },
        {
          "name": "Device=1 In=I64 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 29,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "I64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "8388608"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1909"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002665689759036145"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0182363750388233"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00026191822334444936"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.003986137271503454"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "32027584384.489807"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "512441350151.8369"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6999035049058088"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.545965233"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2012"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002600545409185512"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.52324384"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=F64 Out=I8",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 30,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I8"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=F64 Out=I16",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 31,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I16"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=F64 Out=I32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 32,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=F64 Out=F32",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 33,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F32"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
        },
        {
          "name": "Device=1 In=F64 Out=I64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 34,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "I64"
            }
          ],
          "summaries": [
            {
              "tag": "nv/element_count/Items",
              "name": "Items",
              "description": "Number of elements: Items",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "8388608"
                }
              ]
            },
            {
              "tag": "nv/gmem/reads/InSize",
              "name": "InSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/gmem/writes/OutSize",
              "name": "OutSize",
              "hint": "bytes",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "67108864"
                }
              ]
            },
            {
              "tag": "nv/cold/sample_size",
              "name": "Samples",
              "description": "Number of isolated kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "1909"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/mean",
              "name": "CPU Time",
              "description": "Mean isolated kernel execution time (measured on host CPU)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.00026655877475117843"
                }
              ]
            },
            {
              "tag": "nv/cold/time/cpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated CPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.01817264133840171"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/mean",
              "name": "GPU Time",
              "description": "Mean isolated kernel execution time (measured with CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002619331087848795"
                }
              ]
            },
            {
              "tag": "nv/cold/time/gpu/stdev/relative",
              "name": "Noise",
              "description": "Relative standard deviation of isolated GPU times",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.004144721519339008"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/item_rate",
              "name": "Elem/s",
              "description": "Number of input elements processed per second",
              "hint": "item_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "32025764283.542324"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/bytes_per_second",
              "name": "GlobalMem BW",
              "description": "Number of bytes read/written per second to the CUDA device's global memory",
              "hint": "byte_rate",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "512412228536.6772"
                }
              ]
            },
            {
              "tag": "nv/cold/bw/global/utilization",
              "name": "BWUtil",
              "description": "Global device memory utilization as a percentage of the device's peak bandwidth",
              "hint": "percentage",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.6998637299725158"
                }
              ]
            },
            {
              "tag": "nv/cold/walltime",
              "name": "Walltime",
              "description": "Walltime used for isolated measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.546077204"
                }
              ]
            },
            {
              "tag": "nv/batch/sample_size",
              "name": "Samples",
              "description": "Number of batch kernel executions",
              "hint": "sample_size",
              "data": [
                {
                  "name": "value",
                  "type": "int64",
                  "value": "2003"
                }
              ]
            },
            {
              "tag": "nv/batch/time/gpu/mean",
              "name": "Batch GPU",
              "description": "Mean batch kernel execution time (measured by CUDA events)",
              "hint": "duration",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.0002600450215789597"
                }
              ]
            },
            {
              "tag": "nv/batch/walltime",
              "name": "Walltime",
              "description": "Walltime used for batch measurements",
              "hint": "duration",
              "hide": "Hidden by default.",
              "data": [
                {
                  "name": "value",
                  "type": "float64",
                  "value": "0.520883479"
                }
              ]
            }
          ],
          "is_skipped": false
        },
        {
          "name": "Device=1 In=F64 Out=F64",
          "min_samples": 10,
          "min_time": 0.5,
          "max_noise": 0.005,
          "skip_time": -1.0,
          "timeout": 15.0,
          "device": 1,
          "type_config_index": 35,
          "axis_values": [
            {
              "name": "In",
              "type": "string",
              "value": "F64"
            },
            {
              "name": "Out",
              "type": "string",
              "value": "F64"
            }
          ],
          "summaries": null,
          "is_skipped": true,
          "skip_reason": "Not a conversion: InputType == OutputType."
        }
      ]
    }
  ]
}