From 461f417b9dfb7b7a14fbe65cf7c9191115b3f7b0 Mon Sep 17 00:00:00 2001 From: akerr Date: Tue, 18 Sep 2018 16:58:03 -0700 Subject: [PATCH 1/4] Checkpointing CUTLASS 1.1 release. --- changelog.md => CHANGELOG.md | 18 +- CMakeLists.txt | 91 +- CUTLASS.md | 311 ++++ Doxyfile | 2 +- README.md | 79 +- clang-format.sh | 17 - cutlass/convert.h | 2 +- cutlass/coord.h | 160 +- cutlass/core_io.h | 90 +- cutlass/cutlass.h | 21 +- cutlass/fragment.h | 42 +- cutlass/fragment_load_store.h | 135 -- cutlass/fragment_multiply_add.h | 66 +- cutlass/gemm/clear_accumulators.h | 7 +- cutlass/gemm/dgemm_traits.h | 39 +- cutlass/gemm/fp16_sgemm_multiply_add.h | 83 + cutlass/gemm/fp16_sgemm_traits.h | 152 ++ cutlass/gemm/gemm.h | 305 ++-- cutlass/gemm/gemm_config.h | 145 ++ cutlass/gemm/gemm_coord.h | 203 +++ cutlass/gemm/gemm_desc.h | 205 +++ cutlass/gemm/gemm_epilogue.h | 105 +- cutlass/gemm/gemm_epilogue_traits.h | 71 +- cutlass/gemm/gemm_global_stream.h | 149 +- cutlass/gemm/gemm_global_tile.h | 433 +++-- cutlass/gemm/gemm_operand.h | 6 +- cutlass/gemm/gemm_shared_stream.h | 41 +- cutlass/gemm/gemm_shared_tile.h | 2 +- cutlass/gemm/gemm_stream_pair.h | 251 +++ cutlass/gemm/gemm_traits.h | 460 ++--- cutlass/gemm/hgemm_global_tile.h | 16 +- cutlass/gemm/hgemm_multiply_add.h | 18 +- cutlass/gemm/hgemm_swizzle.h | 2 +- cutlass/gemm/hgemm_traits.h | 125 +- cutlass/gemm/igemm_epilogue.h | 28 +- cutlass/gemm/igemm_global_tile.h | 57 +- cutlass/gemm/igemm_multiply_add.h | 16 +- cutlass/gemm/igemm_swizzle.h | 12 +- cutlass/gemm/igemm_traits.h | 149 +- cutlass/gemm/linear_scaling.h | 100 +- cutlass/gemm/linear_scaling_device_ptr.h | 149 ++ cutlass/gemm/scalar_or_pointer.h | 129 ++ cutlass/gemm/sgemm_traits.h | 139 +- cutlass/gemm/thread_multiply_add.h | 30 +- cutlass/gemm/threadblock_swizzle.h | 387 +++++ cutlass/gemm/wmma_gemm_epilogue_traits.h | 25 +- cutlass/gemm/wmma_gemm_global_tile.h | 132 +- cutlass/gemm/wmma_gemm_multiply_add.h | 257 ++- cutlass/gemm/wmma_gemm_shared_tile.h | 9 +- cutlass/gemm/wmma_gemm_traits.h | 640 ++++++- cutlass/iterator_access.h | 255 +-- cutlass/kernel_launch.h | 67 + cutlass/load_store.h | 261 ++- cutlass/matrix_traits.h | 326 +++- cutlass/predicate_vector.h | 9 +- cutlass/reshape_tile.h | 2 +- cutlass/shape.h | 75 +- cutlass/tensor_ref.h | 598 ++++++- cutlass/tensor_ref_collection.h | 420 +++++ cutlass/tensor_view.h | 228 ++- cutlass/tile_allocation.h | 143 ++ cutlass/tile_coord.h | 194 +++ cutlass/tile_iterator.h | 610 +++++-- cutlass/tile_stream.h | 378 +++++ cutlass/tile_traits_standard.h | 8 +- cutlass/util/complex.h | 457 +++++ cutlass/util/cutlass_math.h | 36 +- .../numeric_types.h} | 29 +- cutlass/util/platform.h | 10 +- cutlass/vector.h | 170 +- cutlass/wmma_matrix.h | 67 +- cutlass/zip_fragment.h | 150 ++ cutlass/zip_tensor_ref.h | 77 + cutlass/zip_tile_iterator.h | 287 ++++ examples/00_basic_gemm/CMakeLists.txt | 38 + examples/00_basic_gemm/basic_gemm.cu | 492 ++++++ examples/01_tensor_view/CMakeLists.txt | 38 + examples/01_tensor_view/tensor_view.cu | 424 +++++ examples/02_cutlass_utilities/CMakeLists.txt | 38 + .../02_cutlass_utilities/cutlass_utilities.cu | 359 ++++ .../03_strided_batched_gemm/CMakeLists.txt | 38 + .../strided_batched_gemm.cu | 349 ++++ examples/04_tile_iterator/CMakeLists.txt | 38 + examples/04_tile_iterator/tile_iterator.cu | 248 +++ examples/05_wmma_gemm/CMakeLists.txt | 38 + examples/05_wmma_gemm/wmma_gemm.cu | 353 ++++ examples/CMakeLists.txt | 28 + media/images/cutlass-threadblock-gemm.png | Bin 0 -> 60809 bytes media/images/cutlass-tile-iteration.png | Bin 0 -> 76377 bytes media/images/cutlass-tile-structure.png | Bin 0 -> 116377 bytes .../cutlass-warp-thread-tile-structure.png | Bin 0 -> 179689 bytes media/images/gemm-hierarchy-with-epilogue.png | Bin 256654 -> 258829 bytes media/images/gemm-structural-components.png | Bin 0 -> 245863 bytes tools/test/perf/CMakeLists.txt | 8 +- ...ass_perf_test.cpp => cutlass_perf_test.cu} | 60 +- tools/test/perf/cutlass_perf_test.h | 44 + tools/test/perf/gemm/bmma_gemm.cu | 121 ++ tools/test/perf/gemm/cublas_dispatch.h | 4 +- tools/test/perf/gemm/cutlass_dispatch.h | 33 +- tools/test/perf/gemm/dgemm.cu | 50 +- tools/test/perf/gemm/gemm_perf_testbed.h | 366 ++-- tools/test/perf/gemm/gemm_profiler.h | 192 ++- tools/test/perf/gemm/hgemm.cu | 78 +- tools/test/perf/gemm/igemm.cu | 92 +- tools/test/perf/gemm/sgemm.cu | 64 +- tools/test/perf/gemm/wmma_binary_gemm.cu | 149 ++ tools/test/perf/gemm/wmma_gemm.cu | 197 ++- tools/test/perf/gemm/wmma_integer_gemm.cu | 455 +++++ tools/test/perf/performance_result.h | 107 +- tools/test/perf/provider.h | 71 + tools/test/perf/testbench_configs.h | 189 +++ tools/test/perf/testbench_options.h | 413 +++-- tools/test/perf/testbench_output.h | 46 +- tools/test/unit/CMakeLists.txt | 25 +- tools/test/unit/core/layout_verification.cu | 4 +- tools/test/unit/core/layout_verification.h | 12 +- tools/test/unit/core/predicate_vector.cu | 129 +- tools/test/unit/core/tensor_ref.cu | 220 +++ tools/test/unit/core/tensor_view.cu | 235 +++ tools/test/unit/core/tile_iterator.cu | 258 +-- tools/test/unit/core/zip_tile_iterator.cu | 173 ++ tools/test/unit/cutlass_unit_test.cpp | 60 +- tools/test/unit/cutlass_unit_test.h | 1 + .../gemm/batched_strided_dgemm_128x128x8.cu | 103 ++ .../gemm/batched_strided_hgemm_128x128x8.cu | 112 ++ .../gemm/batched_strided_sgemm_128x128x8.cu | 135 ++ tools/test/unit/gemm/binary_gemm.h | 77 + tools/test/unit/gemm/dgemm.cu | 13 +- tools/test/unit/gemm/epilogue_functor.cu | 121 ++ .../unit/gemm/fp16_sgemm_fp16_128x128x16.cu | 321 ++++ .../unit/gemm/fp16_sgemm_fp32_128x128x16.cu | 174 ++ tools/test/unit/gemm/gemm_nvrtc.cu | 14 +- tools/test/unit/gemm/gemm_nvrtc.h | 16 +- .../test/unit/gemm/gemm_shared_mem_layouts.cu | 621 ------- tools/test/unit/gemm/gemm_testbed.h | 615 +++++-- tools/test/unit/gemm/hgemm_128x128x16.cu | 30 +- tools/test/unit/gemm/hgemm_128x128x8.cu | 12 +- tools/test/unit/gemm/hgemm_128x32x8.cu | 10 +- tools/test/unit/gemm/hgemm_128x64x8.cu | 10 +- tools/test/unit/gemm/igemm_128x128x32.cu | 45 +- .../test/unit/gemm/igemm_128x128x32_float.cu | 46 +- tools/test/unit/gemm/igemm_128x128x32_int8.cu | 48 +- tools/test/unit/gemm/igemm_128x32x32.cu | 49 +- tools/test/unit/gemm/igemm_128x64x32.cu | 19 +- tools/test/unit/gemm/igemm_32x32x128.cu | 8 +- .../test/unit/gemm/{gemm.h => integer_gemm.h} | 95 +- tools/test/unit/gemm/run_gemm.h | 244 +++ tools/test/unit/gemm/sgemm_128x128x16.cu | 93 +- tools/test/unit/gemm/sgemm_128x128x8.cu | 19 +- tools/test/unit/gemm/sgemm_128x32x16.cu | 66 +- tools/test/unit/gemm/sgemm_128x32x8.cu | 10 +- tools/test/unit/gemm/sgemm_128x64x16.cu | 64 +- tools/test/unit/gemm/sgemm_128x64x8.cu | 12 +- tools/test/unit/gemm/sgemm_64x128x16.cu | 12 +- tools/test/unit/gemm/sgemm_64x128x8.cu | 10 +- tools/test/unit/gemm/sgemm_64x32x16.cu | 10 +- tools/test/unit/gemm/sgemm_64x32x8.cu | 10 +- tools/test/unit/gemm/sgemm_64x64x16.cu | 10 +- tools/test/unit/gemm/sgemm_64x64x8.cu | 10 +- .../unit/gemm/sgemm_threadblock_swizzle_nn.cu | 1481 +++++++++++++++++ .../unit/gemm/sgemm_threadblock_swizzle_nt.cu | 1481 +++++++++++++++++ .../unit/gemm/sgemm_threadblock_swizzle_tn.cu | 1481 +++++++++++++++++ .../unit/gemm/sgemm_threadblock_swizzle_tt.cu | 1481 +++++++++++++++++ .../unit/gemm/warp_multiply_add_nvcuda.cu | 276 +++ tools/test/unit/gemm/wmma_binary_gemm.cu | 236 +++ tools/test/unit/gemm/wmma_gemm.cu | 153 +- tools/test/unit/gemm/wmma_gemm_epilogue.cu | 446 +++++ .../unit/gemm/wmma_gemm_fragment_stream.cu | 504 ++++++ .../test/unit/gemm/wmma_gemm_multiply_add.cu | 629 +++++++ tools/test/unit/gemm/wmma_integer_gemm.cu | 630 +++++++ tools/test/unit/util/complex.cu | 102 ++ tools/test/unit/util/host_tensor.cu | 384 ++++- tools/test/unit/util/tensor_elementwise.cu | 324 ++++ tools/test/unit/util/tensor_foreach.cu | 217 +++ tools/test/unit/util/unique_ptr.cu | 25 + tools/util/command_line.h | 73 +- tools/util/device_memory.h | 27 +- tools/util/distribution.h | 138 ++ tools/util/exceptions.h | 2 +- tools/util/half.h | 57 +- tools/util/host_matrix.h | 264 +++ tools/util/host_matrix_view.h | 205 +++ tools/util/host_tensor.h | 407 ++--- tools/util/host_tensor_view.h | 445 +++-- .../device/kernel/tensor_elementwise.h | 162 ++ .../reference/device/kernel/tensor_foreach.h | 112 ++ .../reference/device/tensor_elementwise.h | 772 +++++++++ tools/util/reference/device/tensor_foreach.h | 72 + tools/util/reference/host/gemm.h | 270 +++ .../util/reference/host/tensor_elementwise.h | 478 ++++++ tools/util/reference/host/tensor_foreach.h | 102 ++ tools/util/tensor_view_io.h | 151 +- tools/util/type_traits.h | 111 +- 193 files changed, 29496 insertions(+), 4771 deletions(-) rename changelog.md => CHANGELOG.md (77%) create mode 100644 CUTLASS.md delete mode 100755 clang-format.sh delete mode 100644 cutlass/fragment_load_store.h create mode 100644 cutlass/gemm/fp16_sgemm_multiply_add.h create mode 100644 cutlass/gemm/fp16_sgemm_traits.h create mode 100644 cutlass/gemm/gemm_config.h create mode 100644 cutlass/gemm/gemm_coord.h create mode 100644 cutlass/gemm/gemm_desc.h create mode 100644 cutlass/gemm/gemm_stream_pair.h create mode 100644 cutlass/gemm/linear_scaling_device_ptr.h create mode 100644 cutlass/gemm/scalar_or_pointer.h create mode 100644 cutlass/gemm/threadblock_swizzle.h create mode 100644 cutlass/kernel_launch.h create mode 100644 cutlass/tensor_ref_collection.h create mode 100644 cutlass/tile_allocation.h create mode 100644 cutlass/tile_coord.h create mode 100644 cutlass/tile_stream.h create mode 100644 cutlass/util/complex.h rename cutlass/{gemm/identity_block_swizzle.h => util/numeric_types.h} (79%) create mode 100644 cutlass/zip_fragment.h create mode 100644 cutlass/zip_tensor_ref.h create mode 100644 cutlass/zip_tile_iterator.h create mode 100644 examples/00_basic_gemm/CMakeLists.txt create mode 100644 examples/00_basic_gemm/basic_gemm.cu create mode 100644 examples/01_tensor_view/CMakeLists.txt create mode 100644 examples/01_tensor_view/tensor_view.cu create mode 100644 examples/02_cutlass_utilities/CMakeLists.txt create mode 100644 examples/02_cutlass_utilities/cutlass_utilities.cu create mode 100644 examples/03_strided_batched_gemm/CMakeLists.txt create mode 100644 examples/03_strided_batched_gemm/strided_batched_gemm.cu create mode 100644 examples/04_tile_iterator/CMakeLists.txt create mode 100644 examples/04_tile_iterator/tile_iterator.cu create mode 100644 examples/05_wmma_gemm/CMakeLists.txt create mode 100644 examples/05_wmma_gemm/wmma_gemm.cu create mode 100644 examples/CMakeLists.txt create mode 100644 media/images/cutlass-threadblock-gemm.png create mode 100644 media/images/cutlass-tile-iteration.png create mode 100644 media/images/cutlass-tile-structure.png create mode 100644 media/images/cutlass-warp-thread-tile-structure.png create mode 100644 media/images/gemm-structural-components.png rename tools/test/perf/{cutlass_perf_test.cpp => cutlass_perf_test.cu} (60%) create mode 100644 tools/test/perf/cutlass_perf_test.h create mode 100644 tools/test/perf/gemm/bmma_gemm.cu create mode 100644 tools/test/perf/gemm/wmma_binary_gemm.cu create mode 100644 tools/test/perf/gemm/wmma_integer_gemm.cu create mode 100644 tools/test/perf/provider.h create mode 100644 tools/test/perf/testbench_configs.h create mode 100644 tools/test/unit/core/tensor_ref.cu create mode 100644 tools/test/unit/core/tensor_view.cu create mode 100644 tools/test/unit/core/zip_tile_iterator.cu create mode 100644 tools/test/unit/gemm/batched_strided_dgemm_128x128x8.cu create mode 100644 tools/test/unit/gemm/batched_strided_hgemm_128x128x8.cu create mode 100644 tools/test/unit/gemm/batched_strided_sgemm_128x128x8.cu create mode 100644 tools/test/unit/gemm/binary_gemm.h create mode 100644 tools/test/unit/gemm/epilogue_functor.cu create mode 100644 tools/test/unit/gemm/fp16_sgemm_fp16_128x128x16.cu create mode 100644 tools/test/unit/gemm/fp16_sgemm_fp32_128x128x16.cu delete mode 100644 tools/test/unit/gemm/gemm_shared_mem_layouts.cu rename tools/test/unit/gemm/{gemm.h => integer_gemm.h} (53%) create mode 100644 tools/test/unit/gemm/run_gemm.h create mode 100644 tools/test/unit/gemm/sgemm_threadblock_swizzle_nn.cu create mode 100644 tools/test/unit/gemm/sgemm_threadblock_swizzle_nt.cu create mode 100644 tools/test/unit/gemm/sgemm_threadblock_swizzle_tn.cu create mode 100644 tools/test/unit/gemm/sgemm_threadblock_swizzle_tt.cu create mode 100644 tools/test/unit/gemm/warp_multiply_add_nvcuda.cu create mode 100644 tools/test/unit/gemm/wmma_binary_gemm.cu create mode 100644 tools/test/unit/gemm/wmma_gemm_epilogue.cu create mode 100644 tools/test/unit/gemm/wmma_gemm_fragment_stream.cu create mode 100644 tools/test/unit/gemm/wmma_gemm_multiply_add.cu create mode 100644 tools/test/unit/gemm/wmma_integer_gemm.cu create mode 100644 tools/test/unit/util/complex.cu create mode 100644 tools/test/unit/util/tensor_elementwise.cu create mode 100644 tools/test/unit/util/tensor_foreach.cu create mode 100644 tools/test/unit/util/unique_ptr.cu create mode 100644 tools/util/distribution.h create mode 100644 tools/util/host_matrix.h create mode 100644 tools/util/host_matrix_view.h create mode 100644 tools/util/reference/device/kernel/tensor_elementwise.h create mode 100644 tools/util/reference/device/kernel/tensor_foreach.h create mode 100644 tools/util/reference/device/tensor_elementwise.h create mode 100644 tools/util/reference/device/tensor_foreach.h create mode 100644 tools/util/reference/host/gemm.h create mode 100644 tools/util/reference/host/tensor_elementwise.h create mode 100644 tools/util/reference/host/tensor_foreach.h diff --git a/changelog.md b/CHANGELOG.md similarity index 77% rename from changelog.md rename to CHANGELOG.md index d9ff1d5dd..73c2f7689 100644 --- a/changelog.md +++ b/CHANGELOG.md @@ -1,6 +1,22 @@ # NVIDIA CUTLASS Changelog -## [1.0.1](https://github.com/NVIDIA/cutlass/releases/tag/v1.0.1) (2018-06-11) + +## 1.1.0 (2018-09-19) + * Turing Features + * WMMA GEMM targeting TensorCores - INT8, INT4, INT1 + * Batched Strided GEMM + * Threadblock rasterization strategies + * Improved performance for adverse problem sizes and data layouts + * Extended CUTLASS Core comonents + * Tensor views support arbitrary matrix and tensor layouts + * Zip iterators for structuring multiple data streams + * Enhanced CUTLASS utilities + * Reference code for tensor operations in host and device code + * Added HostMatrix<> for simplified matrix creation + * Examples + * Basic GEMM, tensor views, CUTLASS utilities, batched GEMM, WMMA GEMM + +## 1.0.1 (2018-06-11) * Intra-threadblock reduction added for small threadblock tile sizes * sgemm_64x128x16, sgemm_128x128x16, sgemm_128x64x16, sgemm_128x32x16, sgemm_64x64x16, sgemm_64x32x16 diff --git a/CMakeLists.txt b/CMakeLists.txt index 5a53fae55..fdd51ae88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,11 +55,21 @@ endif() find_package(CUDA) find_package(Doxygen QUIET) +################################################################################################### +# +# Configure CMake variables +# +################################################################################################### + +find_library(CUBLAS_LIBRARY cublas HINTS + ${CUDA_TOOLKIT_ROOT_DIR}/lib64 + ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64) + # By default we want to build in Release mode to ensure that we're getting best performance if (NOT (CMAKE_BUILD_TYPE OR CONFIGURATION_TYPES)) set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose build level" FORCE) # We do support Debug or Release builds - set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release") + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "RelWithDebInfo" "Release") endif() if(WIN32) @@ -68,27 +78,59 @@ if(WIN32) endif() if (WIN32) - # Enable more warnings and treat as errors - string(APPEND NVCC_FLAGS " -Xcompiler /W3 -Xcompiler /WX") + # Enable more warnings and treat as errors + string(APPEND NVCC_FLAGS " -Xcompiler /W3 -Xcompiler /WX") - # Disable excess x86 floating point precision that can lead to results being labeled incorrectly - string(APPEND NVCC_FLAGS " -Xcompiler /fp:strict") + # Disable warning on Unicode characters + string(APPEND NVCC_FLAGS " -Xcompiler /wd4819") - # Verbose option - if (${CUTLASS_NVCC_VERBOSE}) - string(APPEND NVCC_FLAGS " -v") - endif() + # Disable excess x86 floating point precision that can lead to results being labeled incorrectly + string(APPEND NVCC_FLAGS " -Xcompiler /fp:strict") + + # Verbose option + if (${CUTLASS_NVCC_VERBOSE}) + string(APPEND NVCC_FLAGS " -v") + endif() endif(WIN32) -# Configure CUDA options -set(CUTLASS_NVCC_ARCHS "50;60;61;70" CACHE STRING "The SM architectures to build code for.") -set(CUTLASS_NVCC_KEEP OFF CACHE BOOL "Keep intermediate files generated by NVCC.") +set(CUTLASS_NVCC_ARCHS "50;60;61;70;75" CACHE STRING "The SM architectures to build code for.") +set(CUTLASS_NVCC_EMBED_CUBIN ON CACHE BOOL "Embed compiled CUDA kernel binaries into executables.") +set(CUTLASS_NVCC_EMBED_PTX ON CACHE BOOL "Embed compiled PTX into executables.") +set(CUTLASS_NVCC_KEEP OFF CACHE BOOL "Keep intermediate files generated by NVCC.") +# +# NOTE: running with asan and CUDA requires the following environment variable: +# +# ASAN_OPTIONS=protect_shadow_gap=0:replace_intrin=0:detect_leaks=0 +# +# without the above environment setting, an error like the following may be generated: +# +# *** Error: Could not detect active GPU device ID [out of memory] +# ... +# ==9149==ERROR: LeakSanitizer: detected memory leaks +# ... +# +if(ENABLE_ASAN) # https://github.com/google/sanitizers/wiki/AddressSanitizer + string(APPEND NVCC_FLAGS " --compiler-options -fsanitize=address --compiler-options -fno-omit-frame-pointer") + string(APPEND CMAKE_EXE_LINKER_FLAGS " -fsanitize=address") +endif() + +################################################################################################### +# +# Configure CUDA build options +# +################################################################################################### + +# Set NVCC arguments foreach(ARCH ${CUTLASS_NVCC_ARCHS}) - string(APPEND NVCC_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}") + if(CUTLASS_NVCC_EMBED_CUBIN) + string(APPEND NVCC_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}") + endif() + if(CUTLASS_NVCC_EMBED_PTX) + string(APPEND NVCC_FLAGS " -gencode arch=compute_${ARCH},code=compute_${ARCH}") + endif() endforeach() - if (CUTLASS_NVCC_KEEP) string(APPEND NVCC_FLAGS " -keep") endif() @@ -99,11 +141,8 @@ else() string(APPEND NVCC_FLAGS " -lineinfo") endif() -if (UNIX) - string(APPEND NVCC_FLAGS " -Xcompiler -Wconversion") -endif() - string(APPEND NVCC_FLAGS_DEBUG " -g") +string(APPEND NVCC_FLAGS_RELWITHDEBINFO " -O3") string(APPEND NVCC_FLAGS_RELEASE " -O3") # define NDEBUG for release mode to disable assertions @@ -111,11 +150,13 @@ string(APPEND NVCC_FLAGS_RELEASE " -DNDEBUG") if (CUTLASS_NATIVE_CUDA) set(CMAKE_CUDA_FLAGS "${NVCC_FLAGS}") - set(CMAKE_CUDA_FLAGS_DEBUG "${NVCC_FLAGS_DEBUG}") set(CMAKE_CUDA_FLAGS_RELEASE "${NVCC_FLAGS_RELEASE}") + set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "${NVCC_FLAGS_RELWITHDEBINFO}") + set(CMAKE_CUDA_FLAGS_DEBUG "${NVCC_FLAGS_DEBUG}") else() set(CUDA_NVCC_FLAGS ${NVCC_FLAGS}) set(CUDA_NVCC_FLAGS_DEBUG ${NVCC_FLAGS_DEBUG}) + set(CUDA_NVCC_FLAGS_RELWITHDEBINFO ${NVCC_FLAGS_RELWITHDEBINFO}) set(CUDA_NVCC_FLAGS_RELEASE ${NVCC_FLAGS_RELEASE}) endif() @@ -128,6 +169,11 @@ file(GLOB CUTLASS_GEMM RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/gemm/*.h) file(GLOB CUTLASS_UTIL RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/util/*.h) file(GLOB CUTLASS_DEVICE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/device/*.h) file(GLOB CUTLASS_CORE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/*.h) +################################################################################################### +# +# Define build targets +# +################################################################################################### source_group("cutlass\\gemm" FILES ${CUTLASS_GEMM}) source_group("cutlass\\util" FILES ${CUTLASS_UTIL}) @@ -156,9 +202,9 @@ add_custom_target(cutlass_ide SOURCES if (DOXYGEN_FOUND) # DOT is available. Enable graph generation in the documentation if (DOXYGEN_DOT_EXECUTABLE) - set(CUTLASS_ENABLE_DOXYGEN_DOT ON CACHE BOOL "Use dot to generate graphs in the doxygen documentation.") + set(CUTLASS_ENABLE_DOXYGEN_DOT ON CACHE BOOL "Use dot to generate graphs in the doxygen documentation.") else() - set(CUTLASS_ENABLE_DOXYGEN_DOT OFF CACHE BOOL "Use dot to generate graphs in the doxygen documentation." FORCE) + set(CUTLASS_ENABLE_DOXYGEN_DOT OFF CACHE BOOL "Use dot to generate graphs in the doxygen documentation." FORCE) endif() if (CUTLASS_ENABLE_DOXYGEN_DOT) @@ -177,6 +223,5 @@ if (DOXYGEN_FOUND) ) endif() - -#add_subdirectory(examples/gemm) add_subdirectory(tools) +add_subdirectory(examples) diff --git a/CUTLASS.md b/CUTLASS.md new file mode 100644 index 000000000..7dea0f372 --- /dev/null +++ b/CUTLASS.md @@ -0,0 +1,311 @@ +![ALT](/media/images/gemm-hierarchy-with-epilogue-no-labels.png "Complete CUDA GEMM decomposition") + +# CUTLASS + +This document is intended to accompany the CUTLASS source code, to describe the interaction between +CUTLASS core components, and to identify their role in implementing GEMM computations efficiently in CUDA. + +1. [Design Patterns](#S-design-patterns) +2. [General Matrix Multiply](#S-general-matrix-multiply) +3. [Core Components](#S-core-components) +4. [Utilities](#S-utilities) + +# 1. Design Patterns + +CUTLASS strives to achieve the highest performance possible on NVIDIA GPUs while also offering a +flexible composition that an be easily applied to solve new problems related to Deep Learning and +linear algebra. Though we intend to make CUTLASS as simple and straightforward as possible, given +a tradeoff between simplicity and performance, CUTLASS chooses performance. Consequently, several +design patterns are necessary to yield a composable structure while also satisfying these performance +objectives. This section is intended to provide more detail. + +* [Sequencing and Nesting](#S-patterns-sequencing-nesting) +* [Tiles and Iterators](#S-patterns-tiles-iterators) +* [Host-side Params](#S-patterns-host-side-params) +* [Composable Shared Memory](#S-patterns-composable-shared-memory) + +## Sequencing and Nesting of Collective Primitives + +CUTLASS embodies a design paradigm exemplified by the [CUB library](https://nvlabs.github.io/cub/) for expressing collective operations. Objects expose an interface for a problem that is then decomposed into concurrent subtasks executed by cooperating threadblocks, warps, and threads. For example, a grid-level object may be constructed with base pointers to the start of a GEMM operation, add a threadblock-dependent offset to partition the problem, and then compute a per-threadblock GEMM. This in turn performs some operations as a collection of cooperating threads, while it may partition other parts of the task into warp-level subtasks. + +## Tiles and Iterators + +Efficient dense linear algebra computations emphasize data movement to match the execution of mathemtical operators to the flow of data. Consequently, CUTLASS defines a rich set of primitives for partitioning a tile of data among participating threads, warps, and threadblocks. CUTLASS applies the familiar iterator design pattern to provide an abstraction layer to (1.) access these tile objects and (2.) traverse a sequence of objects embedded in a higher level data structure. These subpartitions are typically defined by compile-time constants +specifying element type, size, and data layout. CUTLASS refers to subpartitions as _tiles_. + +_Iterators_ are familiar design patterns in C++ that provide an abstraction for accessing individual +elements in memory as well as traversing over a collection. GEMM kernels in CUTLASS depend on accessing +a sequence of tiles from global memory, from shared memory, and in registers. Consequently, _tile iterators_ +are prevalent throughout the CUTLASS implementation. + +The canonical CUTLASS tile iterator template is defined in [cutlass/tile_iterator.h](cutlass/tile_iterator.h). + +## Host-side Params structure + +Several CUTLASS template classes exhibit a pattern in which problem-specific internal state is known at kernel launch time and remains invariant throughout the execution of a kernel. For example, tile iterators compute several offsets based on the strides of the input tensor that is added to an internal pointer when loading the elements of a tile. These are computed from the tensor stride and never updated; the per-thread internal state consists only of the internal global memory pointer. + +CUTLASS can take advantage of this CUDA grid-invariant property by constructing the object in host code and passing a composed parameters structure to the kernel. This confers two benefits: (1.) invariant state is held in constant memory, and (2.) there is no overhead to compute the initial state by each thread. + +The design pattern in CUTLASS is for classes with nontrivial constructors to define `struct Params` as an inner class which contains grid-invariant state. These should define a constructor and an `initialize()` method. The `Params` structure should also include a data member corresponding to each data member in the parent class, so these too can be properly constructed in host code. The parent class should define a constructor which accepts `Params const &` as its first argument. + +For example, `cutlass::gemm::Gemm<>` should define `struct cutlass::gemm::Gemm::Params`. The latter should define data members for each data member in `cutlass::gemm::Gemm<>`. + + +## Composable shared memory allocation + +Shared memory requires explicit effort by the programmer to allocate and de-allocate. CUTLASS follows the paradigm introduced by [CUB](https://nvlabs.github.io/cub/) to define composed structures for storing data intended to be held in shared memory. Any object requiring shared memory storage for itself or its data members should define a child structure called SharedStorage. This holds data needed by the class and also instantiates SharedStorage objects for each data member. + +To be consistent, this pattern defines a convention in which classes define internal shared memory storage requirements. Classes should consider all SharedStorage structures to be opaque other than their own child class. When the lifetimes of child objects are known to be non-overlapping, unions may be used to alias multiple SharedStorage objects to the same shared memory region and reduce overall SMEM capacity. + +## Loop Unrolling + +CUTLASS requires tiles of data to be stored in registers for high-bandwidth access. Simultaneously, high-throughput math instructions +must be issued concurrently with memory instructions to hide latency with relatively few concurrent threads. These objectives are +achieved by unrolling loops whose iteration counts are known at compile time. + +Consequently, most loops within the CUTLASS GEMM implementation are specified by constant values and template arguments. The CUDA compiler +is able to unroll the loop bodies, map array elements to registers, and construct an efficient instruction schedule. + +## Templates + +CUDA C++ templates and modern generic programming techniques enable CUTLASS device code to span a large design space. + +This design space includes: +* Mixed precision arithmetic and data storage +* Kernels specialized for layout and problem size +* Support for kernel fusion + +Moreover, templates provided a structured approach to collecting compile-time constants such as tile dimensions. These +must be template arguments to target static array allocation and take advantage of loop unrolling, constant folding, +and function inlining. + +# 2. General Matrix Multiply + +The following figure illustrates the hierarchical GEMM computation embodied by CUTLASS. Each stage depicts a nested level of tiling which corresponds to a layer of concurrency within the CUDA execution model and to a level within the memory hierarchy, becoming increasingly finer moving left to right. + +![ALT](/media/images/gemm-structural-components.png "CUTLASS GEMM Structural Components") + +## Threadblock-level GEMM + +The CUTLASS GEMM kernel partitions the _C_ matrix into a 2D tiling of threadblocks. +Each threadblock computes a matrix product whose outer dimensions _M_ and _N_ are compile-time constants. The +GEMM's _K_ dimension is partitioned into tiles and iterated over by the GEMM _mainloop_. The shape of the matrix +multiply operation performed by each iteration of the mainloop is referred to as _OutputTile_. + +The threadblock loads a sequence of tiles from global memory and stores this data to shared memory. The iterative +access and traversal of tiles in global memory are performed by a _TileLoadIterator_, and storing to a circular +buffer in shared memory is performed by a _GlobalLoadIterator_. + +**[Global Load Stream](cutlass/gemm/gemm_global_stream.h)** manages loading of the threadblock-scope multiplicands to the GEMM kernel. It owns an iterator into global memory for loading tiles of data, a TensorAllocation in shared memory to hold the resulting tile, and an iterator for writing the tile into this allocation. A transformer exists to optionally transform the data as it is loaded which may of use to perform type conversion or, in the case of int8 GEMM, transpose 4x4 tiles held in registers. + +The Global Load Stream template contains members defined by the following templates: + +* [GemmGlobalIteratorAb](cutlass/gemm/gemm_global_tile.h) +* [Transformer](cutlass/convert.h) +* [GemmSharedStoreTileAb](cutlass/gemm/gemm_shared_tile.h) + +## Warp-level GEMM + +The threadblock's _OutputTile_ is partitioned among the warps, and each computes a warp-level matrix product. +Data is loaded from shared memory into registers, and math instructions are dispatched to CUDA Cores or Tensor Cores. + +[**Shared Load Stream**](cutlass/gemm/gemm_shared_stream.h) manages loading of warp-level multiplicands from shared memory into registers. This owns an iterator for fetching data and the destination fragments for holding the results. + +* [GemmSharedLoadTile{A,B}](cutlass/gemm/gemm_shared_tile.h) + +**Matrix Multiply** computes a matrix product operation on data held in registers. Specializations exist for thread-level instructions such as single-precision fused multiply-add as well as warp-level matrix operations targeting TensorCores. + +* [WMMA Multiply Add](cutlass/gemm/wmma_gemm_multiply_add.h) + +## Thread-level GEMM + +SGEMM, IGEMM, HGEMM, and DGEMM are computed by SIMT math instructions issued by thread-level matrix multiply +procedures. + +* [ThreadMultiplyAdd](cutlass/gemm/thread_multiply_add.h) +* [IGEMM specialization](cutlass/gemm/igemm_multiply_add.h) +* [HGEMM specialization](cutlass/gemm/hgemm_multiply_add.h) + +## Epilogue + +The [**epilogue**](cutlass/gemm/gemm_epilogue.h) iteratively selects a subset of accumulator elements held by a warp, writes them to shared memory, and loads them by different threads such that a threadblock-scoped tile store operation will make contiguous, striped accesses to global memory. Thus, the flow of data utilizes the following components: + +1. [Transformer](cutlass/convert.h) for converting the data types of accumulator elements +2. [GemmSharedStoreTileD](cutlass/gemm/gemm_shared_tile.h) to store to shared memory specialized to the accumulator layout. +3. [GemmSharedLoadTileD](cutlass/gemm/gemm_shared_tile.h) to load the data from shared memory. +4. [GemmGlobalIteratorC](cutlass/gemm/gemm_global_tile.h) to load a tile from global memory. +5. A [functor](cutlass/gemm/linear_scaling.h) to compute an element-wise operation on the matrix product and source data (such as alpha*AB+beta*C). +6. [GemmGlobalIteratorD](cutlass/gemm/gemm_global_tile.h) to write the output to global memory. + +## GEMM Traits + +[**cutlass::gemm::GemmTraits**](cutlass/gemm/gemm_traits.h) collects the structural properties of a complete GEMM computation into a single template class. As a result, the Traits classes encapsulate the the iterators and transformers for all supported GEMM operands and layouts. Low-level details needed by Traits (such as scalar types for operands, thread-block tile size, number of scalar elements per memory access within each phase, number of stages in shared memory, as well as other implementation-specific properties of the GEMM computation) are specified in class [**cutlass::gemm::GemmConfig**](cutlass/gemm/gemm_config.h). + + +# 3. Core Components + +CUTLASS GEMM kernels are implemented by a set of Core components for interacting with mathematical tensor and matrix +objects as well as constructing efficient CUDA kernels. + +* [Tensor views](#S-core-tensor-views) +* [Shape](#S-core-shape) +* [Tile structure](#S-core-tile-structure) +* [Fragment](#S-core-fragment) +* [Predicate vector](#S-core-predicate-vector) + +## Tensor View + +Matrices and tensors are typically represented as n-D arrays held in linear memory with a single base pointer and a stride vector. Element _i_ of the stride vector indicates the offset in linear memory between consecutive elements in dimension i. Consequently, the linear offset for an arbitrary element specified as an n-tuple may be computed as the dot product of the coordinate and the stride vector. + +CUTLASS provides abstractions for interacting with multidimension tensors in device memory. +Consequently, we define a hierarchy of pointer-like types for referencing tensors. + +`T *` - raw pointer to elements of type T + +`cutlass::TensorRef` - reference to a tensor of elements of type T and given rank. Includes a mapping function and associated stride vector for accessing elements in linear memory. + +`cutlass::TensorView` - extends `TensorRef<>` by adding bounds information. This is a complete mathematical object which may be used as the argument to CUTLASS functions. + +The above provide an identity maping of a logical index space to linear memory. An element +at logical coordinate X has an offset computed as follows: +``` +offset = dot(X, stride) +``` +where `dot()` computes the inner product of X and a vector of "strides." + +CUTLASS 1.1 introduces a mapping function and an additional "storage rank" to offer a flexible way to +map the logical index space of the tensor to memory. The mapping function maps a coordinate +of rank _R_ to an index space of rank _S_. The linear offset is computed as: +``` +offset = dot( MapFunc(X), stride ) +``` +where stride is a vector of rank _S_. + +CUTLASS kernels make extensive use of vectorization of memory accesses for efficiency and +correctness. Consequently, we enforce a constraint on the strides used by mapping functions +such that: + +1. The "fastest-changing" stride is always 1 thereby mandating that consecutive elements in + that rank are consecutive in linear memory. + +2. The fastest changing rank is always last in the stride vector and not explicitly stored. + +Thus, the stride vector used by mapping functions has length of one fewer than the rank of the +storage tensor. These constraints are consistent with the BLAS interface of passing matrices as +a tuple consisting of a pointer and a "leading dimension." In fact, these are rank=2 tensors +whose fastest changing dimension is 1, and only the strided dimension is explicitly represented. + +A typical mapping function might simply map the rows and columns of a matrix, a rank=2 tensor, +to linear memory such that (1.) elements in the same column are consecutive in memory +(column-major), or (2.) elements in the same row are consecutive (row-major). These can be +accomplished by two different mapping functions whose stride vector is length=2. The first +element is the "leading dimension." + +The requirement that the fastest-changing stride always be of unit size need not be a limitation. +To implement "sparse" computations or matrix operations in which matrix elements have arbitrary +stride along both row and column, define a mapping function whose storage rank is 3. This permits +two elements of the stride vector to have a non-unit value. + +`cutlass::TensorView<>` extends this concept by including a size vector to specify the bounds of +the index space. The value of each coordinate in the size vector defines the half-open range of +indices whose smallest value is zero. + +## Shape + +To avoid complicated template metaprogramming, CUTLASS targets fixed compile-time tile sizes specified +by a four-dimensional template `cutlass::Shape<>`. This defines the following dimensions, mirroring +the NHWC tensor format used for convolution in Deep Learning frameworks. + +- `D`: depth of tensor +- `H`: first strided dimension +- `W`: contiguous sequence of tensor elements +- `C`: number of channels, usually used for vectorized access + +Template specializations of `Shape` appear as arguments to numerous dependent template classes which +must specify compile-time constant tile sizes. + +## Tile Structure + +Tiled structures express an arrangement of data in memory as well as a logical mapping of concurrent CUDA +threads to the problem space. For example, the CUTLASS GEMM + +Tiled structures can be defined using the `cutlass::TileTraits<>` concept which defines the following +members. Collectively, these members offer a flexible way to define a 4-D subpartition of an integer +lattice, partition its elements among a collection of threads, and map each unique thread ID to a unique +offset. + +- _Tile_ (concept `Shape<>`) - describes the dimensions of the tile in terms of scalar elements +- _Delta_ (concept `Shape<>`) - describes the distance along each logical dimension between items +- _Iterations_ (concept `Shape<>`) - describes the number of items along each logical dimension +- _ThreadOffset_ (concept _functor_) - implements `Coord<4> operator()() const` to determine a thread's + initial offset in the logical 4-D coordinate space + +The following figure illustrates the CUTLASS tile structure. The overall shape, 16-by-16, is partitioned into +vectors of length two among 32 threads. The elements stored by thread 9 are highlighted. + +CUTLASS tile structure + +The `cutlass::TileTraits<>` definition that describes this arrangement may be defined as follows: + +``` +struct ExampleTileTraits { + + /// Overall shape of tile + typedef Shape<1, 16, 16, 1> Tile; + + /// Distance along each dimension of accesses + typedef Shape<1, 4, 1, 1> Delta; + + /// Number of memory accesses performed by each thread + typedef Shape<1, 4, 1, 1> Iterations; + + /// Offset function - maps each thread to a unique starting offset within the 4D tile + struct ThreadOffset { + + CUTLASS_DEVICE Coord<4> operator()() const { + + typdef Shape<1, 16, 8, 2> Vectorized; + + return make_Coord( + 0, // depth "D" dimension + threadIdx.x / Vectorized::kW, // horisontal "H" dimension - first strided dimension + threadIdx.x % Vectorized::kW, // vertical "W" dimension - contiguous dimension + 0 + ); + } + }; +}; +``` + +## Tile Iterator + +The iterator design pattern provides an abstraction for accessing the items in a collection in sequence. Basic +operators defined by iterators consist of accessing an item - either a load or store - followed by traversal to +the next item in sequence. + +CUTLASS tile access and traversal + +To offer a generic solution that spans numerous data types and layouts, CUTLASS defines the _TileIterator_ concept. +This concept provides access to a sequence of _tiles_ embedded in a tensor in addressable memory. + +The canonical CUTLASS tile iterator template is defined in [cutlass/tile_iterator.h](cutlass/tile_iterator.h). + +## Fragment + +A fragment is analogous to `std::array<>` in that it is a constant-sized array of elements. Typically backed by storage in the SM's register file, CUTLASS `Fragment<>` objects are used to store tiles. For threadblock- and warp-scope operations, the contents of these tiles are distributed across the partipcipating threads. In such cases, a thread's `Fragment<>` contains the part of the tile held by that thread. + +## Predicate Vector + +SIMT architectures utilize predicated execution in place of control flow when conditional code sequences are fairly short, on the order of a few machine instructions. While CUDA C++ does not include constructs at the language level for predication, PTX makes this explicit, and compilation to SASS is assumed to aggressively utilize predication. Typical applications are to initialize a sequence of bits used to mask memory operations and use these bits as predicates guarding memory load and store instructions. + +CUTLASS provides `PredicateVector` defined in [cutlass/predicate_vector.h](cutlass/predicate_vector.h) to manage a statically-sized bit vector, store them into general purpose registers, and efficiently access them in sequence. By storing four predicates per byte in hardware registers, the CUDA compiler is able to issue specialized instructions to achieve very efficient unpacking. + + +# 4. Utilities + +CUTLASS implements efficient matrix multiply computations on GPUs. It is accompanied by an extensive utility +framework offering features such as: + +* [cutlass::half_t](tools/util/half.h) - a host-side half-precision type +* Components for allocating and initializing [host-side and device-side tensors](tools/util/host_tensor.h) usable by CUTLASS +* Reference implementations of [GEMM](tools/util/reference/host/gemm.h) and [element-wise operations](tools/util/reference/host/tensor_elementwise.h) diff --git a/Doxyfile b/Doxyfile index 51cec529b..1d96f3770 100644 --- a/Doxyfile +++ b/Doxyfile @@ -58,7 +58,7 @@ PROJECT_LOGO = # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. -OUTPUT_DIRECTORY = docs +OUTPUT_DIRECTORY = doxygen # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and diff --git a/README.md b/README.md index 56473a286..d5bd15ef7 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ ![ALT](/media/images/gemm-hierarchy-with-epilogue-no-labels.png "Complete CUDA GEMM decomposition") -# CUTLASS 1.0 +# CUTLASS 1.1 -_CUTLASS 1.0.1 - June 2018_ +_CUTLASS 1.1.0 - September 2018_ -CUTLASS 1.0 is a collection of CUDA C++ template abstractions for implementing +CUTLASS 1.1 is a collection of CUDA C++ template abstractions for implementing high-performance matrix-multiplication (GEMM) at all levels and scales within CUDA. It incorporates strategies for hierarchical decomposition and data movement similar to those used to implement cuBLAS. CUTLASS decomposes these "moving parts" into @@ -22,14 +22,27 @@ point (FP64) types. Furthermore, CUTLASS demonstrates CUDA's WMMA API for targe the programmable, high-throughput _Tensor Cores_ provided by NVIDIA's Volta architecture and beyond. -CUTLASS 1.0 has changed substantially from our preview release described in -the [CUTLASS Parallel For All](https://devblogs.nvidia.com/parallelforall/cutlass-linear-algebra-cuda) -post. We have decomposed the structure of the GEMM computation into deeper, structured -primitives for loading data, computing predicate masks, streaming data at each level of -the GEMM hierarchy, and updating the output matrix. +CUTLASS 1.1 is described in the [CUTLASS Documentation](CUTLASS.md) and the accompanying +[Doxygen documentation](https://nvidia.github.io/cutlass). +We describe the structure of an efficient GEMM in our talk at the +[GPU Technology Conference 2018](http://on-demand.gputechconf.com/gtc/2018/presentation/s8854-cutlass-software-primitives-for-dense-linear-algebra-at-all-levels-and-scales-within-cuda.pdf). -CUTLASS 1.0 is described in the [Doxygen documentation](https://nvidia.github.io/cutlass) -and our talk at the [GPU Technology Conference 2018](http://on-demand.gputechconf.com/gtc/2018/presentation/s8854-cutlass-software-primitives-for-dense-linear-algebra-at-all-levels-and-scales-within-cuda.pdf). +# What's New in CUTLASS 1.1 + +* [CUTLASS Documentation](CUTLASS.md) +* [Examples](examples/) + * Basic GEMM, tensor views, CUTLASS utilities, batched GEMM, WMMA GEMM +* Turing Features + * [WMMA GEMM targeting TensorCores](tools/test/unit/gemm/wmma_integer_gemm.cu) - INT8, INT4, INT1 +* [Batched Strided GEMM](tools/test/unit/gemm/batched_strided_sgemm_128x128x8.cu) +* [Threadblock rasterization strategies](tools/test/unit/gemm/sgemm_threadblock_swizzle_nt.cu) + * Improved performance for adverse problem sizes and data layouts +* Extended CUTLASS Core comonents + * Tensor views support arbitrary matrix and tensor layouts + * Zip iterators for structuring multiple data streams +* Enhanced CUTLASS utilities + * [Reference implementations](tools/util/reference) for tensor operations in [host](tools/util/reference/host) and [device](tools/util/reference/device) code + * Added `HostMatrix<>` for simplified matrix creation # Performance @@ -39,11 +52,11 @@ CUTLASS primitives are very efficient. When used to construct device-wide GEMM they exhibit performance comparable to cuBLAS for scalar GEMM computations. The above figure shows CUTLASS performance relative to cuBLAS for large matrix dimensions (M=10240, N=K=4096) running on an NVIDIA Titan V GPU -when compiled with CUDA 9.2. +when compiled with CUDA 10.0. # Compatibility -CUTLASS requires CUDA 9 and performs best with [CUDA 9.2 Toolkit](ttps://developer.nvidia.com/cuda-toolkit) or later. +CUTLASS requires CUDA 9 but performs best with [CUDA 10.0 Toolkit](ttps://developer.nvidia.com/cuda-toolkit) or later. |**Operating System** | **Compiler** | |-----------------|----------| @@ -63,7 +76,7 @@ any Maxwell-, Pascal-, or Volta-architecture NVIDIA GPU. |NVIDIA Tesla P100| |NVIDIA Tesla V100| |NVIDIA TitanV| - +|NVIDIA GeForce RTX 2080 TI, 2080, 2070| # Building CUTLASS @@ -79,7 +92,7 @@ $ git submodule update --init --recursive ``` CUTLASS can be build with CMake starting version 3.10. By default CUTLASS will build kernels -for CUDA architecture versions 5.0, 6.0, 6.1 and 7.0. To reduce compile time you can specify +for CUDA architecture versions 5.0, 6.0, 6.1, 7.0 and 7.5. To reduce compile time you can specify the architectures to build CUTLASS for by changing the CMake configuration setting `CUTLASS_NVCC_ARCHS`. @@ -107,13 +120,12 @@ $ ./tools/test/unit/cutlass_unit_test ... ... [----------] Global test environment tear-down -[==========] 481 tests from 24 test cases ran. (5954 ms total) -[ PASSED ] 481 tests. +[==========] 946 tests from 57 test cases ran. (10812 ms total) +[ PASSED ] 946 tests. ``` All tests should pass, though the exact number of tests may vary over time. - # Project Structure CUTLASS is arranged as a header-only library with several example test programs @@ -128,28 +140,41 @@ templates in the cutlass/gemm directory. ``` cutlass/ - gemm/ - util/ - + gemm/ + util/ + ``` Several tools and test programs are also distributed with the CUTLASS library. They are contained in the following directories. ``` +examples/ + 00_basic_gemm/ + 01_tensor_view/ + 02_cutlass_utilities/ + 03_batched_gemm/ + 04_tile_iterator/ + 05_wmma_gemm/ tools/ - test/ - unit/ - core/ - gemm/ - perf/ - util/ - + test/ + unit/ + core/ + gemm/ + perf/ + util/ + reference/ + device/ + host/ + ``` The `test/unit/` directory consist of unit tests implemented with Google Test that demonstrate basic usage of Core API components and complete tests of the CUTLASS GEMM computations. +The `tools/util` directory contains CUTLASS utilities including reference implementations of GEMM and +several element-wise tensor operations. + # Performance Profiling The `test/perf/` directory contains a command-line utility for launching each of the GEMM kernels. diff --git a/clang-format.sh b/clang-format.sh deleted file mode 100755 index b2570d914..000000000 --- a/clang-format.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -set -e - -function formatFiles { - for f in `find "$1" -type f -name "*.$2"` ; do - COMMAND="clang-format -i $f" - echo $COMMAND - $COMMAND - done -} - -formatFiles "cutlass" "h" -formatFiles "tools/test" "h" -formatFiles "tools/test" "cpp" -formatFiles "tools/util" "h" - diff --git a/cutlass/convert.h b/cutlass/convert.h index 933d68a82..b4d0f8edd 100644 --- a/cutlass/convert.h +++ b/cutlass/convert.h @@ -28,7 +28,7 @@ */ #pragma once -#include +#include "cutlass/fragment.h" namespace cutlass { diff --git a/cutlass/coord.h b/cutlass/coord.h index 431c9bf1a..625a22723 100644 --- a/cutlass/coord.h +++ b/cutlass/coord.h @@ -28,7 +28,8 @@ #pragma once -#include +#include "cutlass/cutlass.h" +#include "cutlass/util/platform.h" namespace cutlass { @@ -44,20 +45,27 @@ struct Identity { //////////////////////////////////////////////////////////////////////////////////////////////////// /// Statically-sized array specifying Coords within a tensor -template +template struct Coord { // // Type and constant definitions // - static int const N = N_; + /// Number of elements in Coord + static int const kRank = Rank_; + + /// Number of elements in Coord, aliased for compatibility + static int const N = Rank_; + + /// Index type used to store elements + typedef Index_ Index; // // Data members // /// Indices - int idx[N]; + Index idx[kRank]; // // Methods @@ -65,25 +73,72 @@ struct Coord { /// Default ctor initializes uniformly CUTLASS_HOST_DEVICE - Coord(int value = 0) { - for (int i = 0; i < N; ++i) { + Coord(Index value = 0) { + for (int i = 0; i < kRank; ++i) { idx[i] = value; } } /// Constructs from an array of integers CUTLASS_HOST_DEVICE - Coord(int _idx[]) { - for (int i = 0; i < N; ++i) { + Coord(Index _idx[]) { + for (int i = 0; i < kRank; ++i) { idx[i] = _idx[i]; } } + /// Constructs from an array of integers + CUTLASS_HOST_DEVICE + Coord(Coord const &coord) { + for (int i = 0; i < kRank; ++i) { + idx[i] = coord[i]; + } + } + + /// Returns a slice of the Coord which may be larger or smaller in rank + /// than this. + template + CUTLASS_HOST_DEVICE + Coord slice(int start = 0, Index identity = 0) const { + Coord result; + for (int i = 0; i < Slice; ++i) { + if (i + start < kRank) { + slice[i] = idx[i + start]; + } + else { + slice[i] = identity; + } + } + return result; + } + + /// Returns true if Coord is non-zero. + CUTLASS_HOST_DEVICE + operator bool() const { + for (int i = 0; i < kRank; ++i) { + if (idx[i]) { + return true; + } + } + return false; + } + + /// Returns true if Coord is uniformly zero. + CUTLASS_HOST_DEVICE + bool operator!() const { + for (int i = 0; i < kRank; ++i) { + if (idx[i]) { + return false; + } + } + return true; + } + /// Element-wise addition CUTLASS_HOST_DEVICE Coord operator+(Coord const& b) const { Coord c; - for (int i = 0; i < N; ++i) { + for (int i = 0; i < kRank; ++i) { c.idx[i] = idx[i] + b.idx[i]; } return c; @@ -93,7 +148,7 @@ struct Coord { CUTLASS_HOST_DEVICE Coord operator-(Coord const& b) const { Coord c; - for (int i = 0; i < N; ++i) { + for (int i = 0; i < kRank; ++i) { c.idx[i] = idx[i] - b.idx[i]; } return c; @@ -103,7 +158,7 @@ struct Coord { CUTLASS_HOST_DEVICE Coord operator*(Coord const& b) const { Coord c; - for (int i = 0; i < N; ++i) { + for (int i = 0; i < kRank; ++i) { c.idx[i] = idx[i] * b.idx[i]; } return c; @@ -113,7 +168,7 @@ struct Coord { CUTLASS_HOST_DEVICE Coord operator/(Coord const& b) const { Coord c; - for (int i = 0; i < N; ++i) { + for (int i = 0; i < kRank; ++i) { c.idx[i] = idx[i] / b.idx[i]; } return c; @@ -122,7 +177,7 @@ struct Coord { /// In-place addition CUTLASS_HOST_DEVICE Coord& operator+=(Coord const& b) { - for (int i = 0; i < N; ++i) { + for (int i = 0; i < kRank; ++i) { idx[i] += b.idx[i]; } return *this; @@ -131,7 +186,7 @@ struct Coord { /// In-place subtraction CUTLASS_HOST_DEVICE Coord& operator-=(Coord const& b) { - for (int i = 0; i < N; ++i) { + for (int i = 0; i < kRank; ++i) { idx[i] -= b.idx[i]; } return *this; @@ -140,7 +195,7 @@ struct Coord { /// In-place multiplication CUTLASS_HOST_DEVICE Coord& operator*=(Coord const& b) { - for (int i = 0; i < N; ++i) { + for (int i = 0; i < kRank; ++i) { idx[i] *= b.idx[i]; } return *this; @@ -149,22 +204,22 @@ struct Coord { /// In-place division CUTLASS_HOST_DEVICE Coord& operator/=(Coord const& b) { - for (int i = 0; i < N; ++i) { + for (int i = 0; i < kRank; ++i) { idx[i] /= b.idx[i]; } return *this; } /// Member access operator - CUTLASS_HOST_DEVICE int& operator[](int dim) { return idx[dim]; } + CUTLASS_HOST_DEVICE Index& operator[](int dim) { return idx[dim]; } /// Member access operator - CUTLASS_HOST_DEVICE int const& operator[](int dim) const { return idx[dim]; } + CUTLASS_HOST_DEVICE Index const& operator[](int dim) const { return idx[dim]; } /// Computes the dot product of two Coord instances template CUTLASS_HOST_DEVICE T dot(Coord const& b, T sum) const { - for (int i = 0; i < N; ++i) { + for (int i = 0; i < kRank; ++i) { sum += idx[i] * b.idx[i]; } return sum; @@ -174,7 +229,7 @@ struct Coord { template CUTLASS_HOST_DEVICE T dot(Coord const& b) const { T sum = T(0); - for (int i = 0; i < N; ++i) { + for (int i = 0; i < kRank; ++i) { sum += idx[i] * b.idx[i]; } return sum; @@ -182,29 +237,29 @@ struct Coord { /// Gets the index of a given Coord element template - CUTLASS_HOST_DEVICE int& at() { + CUTLASS_HOST_DEVICE Index& at() { return idx[Dim]; } /// Access via index; may limit unrolling potential CUTLASS_HOST_DEVICE - int& at(int dim) { return idx[dim]; } + Index& at(int dim) { return idx[dim]; } /// Gets the index of a given Coord element template - CUTLASS_HOST_DEVICE int const& at() const { + CUTLASS_HOST_DEVICE Index const& at() const { return idx[Dim]; } /// Access via index; may limit unrolling potential CUTLASS_HOST_DEVICE - int const& at(int dim) const { return idx[dim]; } + Index const& at(int dim) const { return idx[dim]; } /// Determines if two Coord<> objects are equal CUTLASS_HOST_DEVICE - bool operator==(Coord const& b) const { + bool operator==(Coord const& b) const { bool equal = true; - for (int i = 0; equal && i < N; ++i) { + for (int i = 0; equal && i < kRank; ++i) { equal = (idx[i] == b.idx[i]); } return equal; @@ -212,12 +267,12 @@ struct Coord { /// Not equal CUTLASS_HOST_DEVICE - bool operator!=(Coord const& b) const { return !(*this == b); } + bool operator!=(Coord const& b) const { return !(*this == b); } /// Clamps a coordinate to a range specified by maximum and minimum values CUTLASS_HOST_DEVICE - Coord& clamp(Coord const& max, Coord const& min = Coord()) { - for (int i = 0; i < N; ++i) { + Coord& clamp(Coord const& max, Coord const& min = Coord()) { + for (int i = 0; i < kRank; ++i) { idx[i] = __NV_STD_MAX(__NV_STD_MIN(idx[i], max.idx[i]), min.idx[i]); } return *this; @@ -225,13 +280,35 @@ struct Coord { /// Returns the product of all elements CUTLASS_HOST_DEVICE - int count() const { - int product = idx[0]; - for (int i = 1; i < N; ++i) { + Index count() const { + Index product = idx[0]; + for (int i = 1; i < kRank; ++i) { product *= idx[i]; } return product; } + + /// Less than operator + CUTLASS_HOST_DEVICE + bool operator<(Coord const &b) const { + for (int i = 0; i < kRank; ++i) { + if (!(idx[i] < b[i])) { + return false; + } + } + return true; + } + + /// Less than or equals operator + CUTLASS_HOST_DEVICE + bool operator<=(Coord const &b) const { + for (int i = 0; i < kRank; ++i) { + if (!(idx[i] <= b[i])) { + return false; + } + } + return true; + } }; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -266,21 +343,10 @@ Coord<4> make_Coord(int _0, int _1, int _2, int _3) { //////////////////////////////////////////////////////////////////////////////////////////////////// -/// Getter -CUTLASS_HOST_DEVICE -Coord<2> get_Coord_hw(Coord<3> const& coord) { return make_Coord(coord[1], coord[2]); } - -/// Getter -CUTLASS_HOST_DEVICE -Coord<2> get_Coord_hw(Coord<4> const& coord) { return make_Coord(coord[1], coord[2]); } - -/// Getter -CUTLASS_HOST_DEVICE -Coord<3> get_Coord_hwc(Coord<4> const& coord) { return make_Coord(coord[1], coord[2], coord[3]); } - -/// Getter -CUTLASS_HOST_DEVICE -Coord<3> get_Coord_dhw(Coord<4> const& coord) { return make_Coord(coord[0], coord[1], coord[2]); } +template +CUTLASS_HOST_DEVICE Coord<3> make_Coord_from_shape() { + return make_Coord(Shape_::kD, Shape_::kH, Shape_::kW); +} //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/cutlass/core_io.h b/cutlass/core_io.h index cceea4c06..849a7613f 100644 --- a/cutlass/core_io.h +++ b/cutlass/core_io.h @@ -22,8 +22,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#pragma once - /*! \file \brief Helpers for printing cutlass/core objects */ @@ -33,12 +31,96 @@ #include #include -#include +#include "cutlass/coord.h" +#include "cutlass/vector.h" + +namespace cutlass { + +/////////////////////////////////////////////////////////////////////////////////////////////////// template -std::ostream& operator<<(std::ostream& out, cutlass::Coord const& coord) { +std::ostream& operator<<(std::ostream& out, Coord const& coord) { for (int i = 0; i < Rank; ++i) { out << (i ? ", " : "") << coord.idx[i]; } return out; } + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Helper to enable formatted printing of CUTLASS scalar types to an ostream +template +struct ScalarIO { + + /// Value to print + T value; + + /// Default ctor + ScalarIO() { } + + /// Constructs from a value + ScalarIO(T value): value(value) {} +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Default printing to ostream +template +inline std::ostream &operator<<(std::ostream &out, ScalarIO const &scalar) { + return out << scalar.value; +} + +/// Printing to ostream of int8_t as integer rather than character +template <> +inline std::ostream &operator<<(std::ostream &out, ScalarIO const &scalar) { + return out << int(scalar.value); +} + +/// Printing to ostream of uint8_t as integer rather than character +template <> +inline std::ostream &operator<<(std::ostream &out, ScalarIO const &scalar) { + return out << unsigned(scalar.value); +} + +/// Printing to ostream of vector of 1b elements +template <> +inline std::ostream &operator<<( + std::ostream &out, + ScalarIO > const &scalar) { + + for (int i = 0; i < 32; i++) { + out << int(scalar.value[i]); + out << ((i != 31) ? ", " : ""); + } + return out; +} + +/// Printing to ostream of vector of 4b signed integer elements +template <> +inline std::ostream &operator<<( + std::ostream &out, + ScalarIO > const &scalar) { + + for (int i = 0; i < 8; i++) { + out << int(scalar.value[i]); + out << ((i != 7) ? ", " : ""); + } + return out; +} + +/// Printing to ostream of vector of 4b unsigned integer elements +template <> +inline std::ostream &operator<<( + std::ostream &out, + ScalarIO > const &scalar) { + + for (int i = 0; i < 8; i++) { + out << unsigned(scalar.value[i]); + out << ((i != 7) ? ", " : ""); + } + return out; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/cutlass.h b/cutlass/cutlass.h index 19600ec8f..097714c02 100644 --- a/cutlass/cutlass.h +++ b/cutlass/cutlass.h @@ -47,7 +47,9 @@ // CUTLASS_DEVICE is an error if not compiling device code #endif -// CUTLASS_PRAGMA_UNROLL inserts a CUTLASS_PRAGMA_UNROLL if supported by the compiler +#define CUTLASS_ASSERT(x) assert(x) + +// CUTLASS_PRAGMA_(UNROLL|NO_UNROLL) optimization directives for the CUDA compiler. #if defined(__CUDA_ARCH__) #if defined(_MSC_VER) #define CUTLASS_PRAGMA_UNROLL __pragma("unroll") @@ -61,7 +63,22 @@ #define CUTLASS_PRAGMA_NO_UNROLL #endif -#define CUTLASS_ASSERT(x) assert(x) +#define CUTLASS_GEMM_LOOP CUTLASS_PRAGMA_NO_UNROLL + +// A small helper class to dump a type at compile time +// Usage:: DumpType::Class +template +struct DebugType {}; + +template +void DebugTypeFunc(T const& t) { + T::t; +} + +// A small helper class to dump a compile time constant at compile time +// Usage: DumpValue::kConstant +template +struct DebugValue {}; namespace cutlass { diff --git a/cutlass/fragment.h b/cutlass/fragment.h index 886b11405..6a93d779c 100644 --- a/cutlass/fragment.h +++ b/cutlass/fragment.h @@ -29,9 +29,9 @@ #pragma once #include -#include -#include -#include +#include "cutlass/shape.h" +#include "cutlass/util/cutlass_math.h" +#include "cutlass/vector.h" namespace cutlass { @@ -72,7 +72,7 @@ provides access to element at (d, h, w, c) //////////////////////////////////////////////////////////////////////////////////////////////////// -template +template struct StorageType { typedef uint64_t Type; }; @@ -108,9 +108,11 @@ struct Fragment : public AlignedStruct { typedef Element_ Element; /// The number of elements. static int const kElements = kElements_; + /// Alignment + static int const kAlignment = kAlignment_; /// Clear a fragment. - CUTLASS_DEVICE void clear() { + CUTLASS_HOST_DEVICE void clear() { // Avoid element-wise access for sub 32b element type if (kAlignment_ >= 8 && (kElements * sizeof(Element)) % 8 == 0) { uint64_t* ptr = reinterpret_cast(storage); @@ -135,14 +137,10 @@ struct Fragment : public AlignedStruct { } /// The accessor. - CUTLASS_DEVICE Element& operator[](int i) { - assert(i < kElements_); - return reinterpret_cast(storage)[i]; - } + CUTLASS_HOST_DEVICE Element& operator[](int i) { return reinterpret_cast(storage)[i]; } /// The accessor. - CUTLASS_DEVICE Element const& operator[](int i) const { - assert(i < kElements_); + CUTLASS_HOST_DEVICE Element const& operator[](int i) const { return reinterpret_cast(storage)[i]; } @@ -188,35 +186,35 @@ struct FragmentIterator { /// Ctor. template - CUTLASS_DEVICE FragmentIterator(OtherFragment_& fragment, int offset = 0) + CUTLASS_HOST_DEVICE FragmentIterator(OtherFragment_& fragment, int offset = 0) : pointer(reinterpret_cast(&fragment[offset])) { static_assert(OtherFragment_::kElements >= Fragment::kElements, ""); } /// The accessor. - CUTLASS_DEVICE AccessType const& at(int d, int h, int w, int c = 0) const { + CUTLASS_HOST_DEVICE AccessType const& at(int d, int h, int w, int c = 0) const { int const imm = ComputeOffsetFromStrides::get(d, h, w, c); return reinterpret_cast(pointer[imm]); } /// The accessor. - CUTLASS_DEVICE AccessType& at(int d, int h, int w, int c = 0) { + CUTLASS_HOST_DEVICE AccessType& at(int d, int h, int w, int c = 0) { int const imm = ComputeOffsetFromStrides::get(d, h, w, c); return reinterpret_cast(pointer[imm]); } /// The accessor. - CUTLASS_DEVICE AccessType const& operator[](int i) const { + CUTLASS_HOST_DEVICE AccessType const& operator[](int i) const { return reinterpret_cast(pointer[i * kElementsPerAccess]); } /// The accessor. - CUTLASS_DEVICE AccessType& operator[](int i) { + CUTLASS_HOST_DEVICE AccessType& operator[](int i) { return reinterpret_cast(pointer[i * kElementsPerAccess]); } /// Is the iterator valid? - CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; } + CUTLASS_HOST_DEVICE bool valid(int d, int h, int w, int c) const { return true; } /// The pointer. Element* pointer; @@ -246,28 +244,28 @@ struct FragmentConstIterator { /// Ctor. template - CUTLASS_DEVICE FragmentConstIterator(OtherFragment_& fragment, int offset = 0) + CUTLASS_HOST_DEVICE FragmentConstIterator(OtherFragment_& fragment, int offset = 0) : pointer(reinterpret_cast(&fragment[offset])) { static_assert(OtherFragment_::kElements >= Fragment::kElements, ""); } /// Create from non-constant FragmentIterator - CUTLASS_DEVICE FragmentConstIterator( + CUTLASS_HOST_DEVICE FragmentConstIterator( FragmentIterator const& rhs_) : pointer(reinterpret_cast(rhs_.offset)) {} /// The accessor. - CUTLASS_DEVICE AccessType const& at(int d, int h, int w, int c = 0) const { + CUTLASS_HOST_DEVICE AccessType const& at(int d, int h, int w, int c = 0) const { int const imm = ComputeOffsetFromStrides::get(d, h, w, c); return reinterpret_cast(pointer[imm]); } /// The accessor. - CUTLASS_DEVICE AccessType const& operator[](int i) const { + CUTLASS_HOST_DEVICE AccessType const& operator[](int i) const { return reinterpret_cast(pointer[i * kElementsPerAccess]); } /// Is the iterator valid? - CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; } + CUTLASS_HOST_DEVICE bool valid(int d, int h, int w, int c) const { return true; } /// The pointer. Element const* pointer; diff --git a/cutlass/fragment_load_store.h b/cutlass/fragment_load_store.h deleted file mode 100644 index a7d272e9e..000000000 --- a/cutlass/fragment_load_store.h +++ /dev/null @@ -1,135 +0,0 @@ -/*************************************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are permitted - * provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright notice, this list of - * conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used - * to endorse or promote products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - **************************************************************************************************/ -/*! \file - \brief Defines accessors for loading and storing fragments to memory efficiently. -*/ -#pragma once - -#include -#include - -namespace cutlass { - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -struct FragmentLoad {}; - -template -struct FragmentLoad { - /// The output type. - typedef FragmentElement_ AccessType; - - /// The load function. - static CUTLASS_DEVICE void load(AccessType& value, Scalar_ const* pointer, int offset) { - value.load(&pointer[offset], kStride); - } -}; - -template -struct FragmentLoad { - /// The output type. - typedef typename Vectorize::Type AccessType; - - /// The load function. - static CUTLASS_DEVICE void load(AccessType& value, Scalar_ const* pointer, int offset) { - Load::load(value, pointer, offset); - } -}; - -template -struct FragmentStore {}; - -template -struct FragmentStore { - /// The input type. - typedef FragmentElement_ AccessType; - - /// The store function. - static CUTLASS_DEVICE void store(AccessType const& value, Scalar_* pointer, int offset) { - value.store(&pointer[offset], kStride); - } -}; - -template -struct FragmentStore { - /// The input type. - typedef typename Vectorize::Type AccessType; - - /// The store function. - static CUTLASS_DEVICE void store(AccessType const& value, Scalar_* pointer, int offset) { - Store::store(value, pointer, offset); - } -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -} /// namespace cutlass diff --git a/cutlass/fragment_multiply_add.h b/cutlass/fragment_multiply_add.h index 36a4d6f6a..de2c8052f 100644 --- a/cutlass/fragment_multiply_add.h +++ b/cutlass/fragment_multiply_add.h @@ -27,52 +27,59 @@ */ #pragma once -#include +#include "cutlass/fragment.h" namespace cutlass { namespace gemm { //////////////////////////////////////////////////////////////////////////////////////////////////// -template +template < typename ScalarAlphaBeta_, + typename ScalarAccum_, + bool fragMul2 = true /*number of element per fragment is multiple of 2*/ +> struct FragmentMultiplyAdd { /// The shape of the instruction. typedef Shape<1, 1, 1, 1> InstructionShape; - /// The type for A. - typedef Scalar_ ScalarA; - /// The type for B. - typedef Scalar_ ScalarB; - /// The type for C and D. - typedef Scalar_ ScalarC; + /// The type for alpha and beta + typedef ScalarAlphaBeta_ ScalarAlphaBeta; + /// The type for accumlator + typedef ScalarAccum_ ScalarAccum; /// Ctor. CUTLASS_DEVICE FragmentMultiplyAdd() {} /// Multiply : d = a*b. template - CUTLASS_DEVICE void multiply(Scalar_ a, FragmentB_ const& b, FragmentCd_& d) { + CUTLASS_DEVICE void multiply(ScalarAlphaBeta a, FragmentB_ const& b, FragmentCd_& d) { +#if defined(__CUDACC__) && __CUDA_ARCH__ >= 530 int const kReduction = FragmentB_::kElements / FragmentCd_::kElements; for (int j = 0; j < FragmentCd_::kElements; ++j) { - d[j] = a * b[j * kReduction + 0]; + d[j] = b[j * kReduction + 0]; for (int k = 1; k < kReduction; ++k) { - d[j] += a * b[j * kReduction + k]; + d[j] += b[j * kReduction + k]; } + d[j] = a * ScalarAlphaBeta(d[j]); } +#endif } /// Multiply : d = a*b + c. template - CUTLASS_DEVICE void multiply_add(Scalar_ a, + CUTLASS_DEVICE void multiply_add(ScalarAlphaBeta a, FragmentB_ const& b, FragmentCd_ const& c, FragmentCd_& d) { +#if defined(__CUDACC__) && __CUDA_ARCH__ >= 530 int const kReduction = FragmentB_::kElements / FragmentCd_::kElements; for (int j = 0; j < FragmentCd_::kElements; ++j) { - d[j] = a * b[j * kReduction + 0] + c[j]; + d[j] = b[j * kReduction + 0]; for (int k = 1; k < kReduction; ++k) { - d[j] += a * b[j * kReduction + k]; + d[j] += b[j * kReduction + k]; } + d[j] = a * ScalarAlphaBeta(d[j]) + ScalarAlphaBeta(c[j]); } +#endif } }; @@ -80,15 +87,13 @@ struct FragmentMultiplyAdd { #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16) template <> -struct FragmentMultiplyAdd { +struct FragmentMultiplyAdd { /// The shape of the instruction. - typedef Shape<1, 1, 2, 1> InstructionShape; - /// The type for A. - typedef half ScalarA; - /// The type for B. - typedef half ScalarB; - /// The type for C and D. - typedef half ScalarC; + typedef Shape<1, 1, 1, 1> InstructionShape; + /// The type for alpha and beta + typedef half ScalarAlphaBeta; + /// The type for accumlator + typedef half ScalarAccum; /// Ctor. CUTLASS_DEVICE FragmentMultiplyAdd() {} @@ -97,17 +102,19 @@ struct FragmentMultiplyAdd { template CUTLASS_DEVICE void multiply(half a, FragmentB_ const& b, FragmentCd_& d) { #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530 - - // Assemble a half2 from a. - __half2 const a_half2 = __half2half2(a); // The input. __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]); // The output. __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]); - int const kReduction = FragmentB_::kElements / FragmentCd_::kElements; + // Assemble a half2 from a. + __half2 const a_half2 = __half2half2(a); + + int const kReduction = (FragmentB_::kElements / FragmentCd_::kElements); + for (int j = 0; j < FragmentCd_::kElements / 2; ++j) { d_half2[j] = __hmul2(a_half2, b_half2[j * kReduction + 0]); + for (int k = 1; k < kReduction; ++k) { d_half2[j] = __hfma2(a_half2, b_half2[j * kReduction + k], d_half2[j]); } @@ -115,6 +122,7 @@ struct FragmentMultiplyAdd { #endif } + /// Multiply : d = a*b + c. template CUTLASS_DEVICE void multiply_add(half a, @@ -122,17 +130,19 @@ struct FragmentMultiplyAdd { FragmentCd_ const& c, FragmentCd_& d) { #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530 - // Assemble a half2 from a. - __half2 const a_half2 = __half2half2(a); // The inputs. __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]); __half2 const* c_half2 = reinterpret_cast<__half2 const*>(&c[0]); // The output. __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]); + // Assemble a half2 from a. + __half2 const a_half2 = __half2half2(a); + int const kReduction = (FragmentB_::kElements / FragmentCd_::kElements); for (int j = 0; j < FragmentCd_::kElements / 2; ++j) { d_half2[j] = __hfma2(a_half2, b_half2[j * kReduction + 0], c_half2[j]); + for (int k = 1; k < kReduction; ++k) { d_half2[j] = __hfma2(a_half2, b_half2[j * kReduction + k], d_half2[j]); } diff --git a/cutlass/gemm/clear_accumulators.h b/cutlass/gemm/clear_accumulators.h index 441370f4c..3a2f33752 100644 --- a/cutlass/gemm/clear_accumulators.h +++ b/cutlass/gemm/clear_accumulators.h @@ -27,7 +27,7 @@ */ #pragma once -#include +#include "cutlass/vector.h" namespace cutlass { namespace gemm { @@ -39,11 +39,12 @@ struct ClearAccumulators { /// The shared storage. struct SharedStorage {}; - /// Ctor. - CUTLASS_DEVICE ClearAccumulators() {} /// Ctor. CUTLASS_DEVICE ClearAccumulators(SharedStorage& shared_storage) {} + /// Ctor. + CUTLASS_DEVICE ClearAccumulators() {} + /// Clear the fragment. template CUTLASS_DEVICE void clear(Fragment_& fragment) { diff --git a/cutlass/gemm/dgemm_traits.h b/cutlass/gemm/dgemm_traits.h index 0bbc2210b..5c0559020 100644 --- a/cutlass/gemm/dgemm_traits.h +++ b/cutlass/gemm/dgemm_traits.h @@ -27,13 +27,13 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/gemm_epilogue.h" +#include "cutlass/gemm/gemm_epilogue_traits.h" +#include "cutlass/gemm/gemm_global_tile.h" +#include "cutlass/gemm/gemm_shared_tile.h" +#include "cutlass/gemm/gemm_traits.h" +#include "cutlass/gemm/thread_multiply_add.h" namespace cutlass { namespace gemm { @@ -41,10 +41,10 @@ namespace gemm { //////////////////////////////////////////////////////////////////////////////////////////////////// template < - /// The tile size for the GEMM KxNxM. + /// The tile size for threadblock-level GEMM (K-by-N-by-M). typename OutputTile_, - /// The number of accumulators per thread. - typename AccumulatorsPerThread_, + /// Tile size for thread-level GEMM (K-by-N-by-M) + typename ThreadGemmShape_, /// The number of scalars per LDG for A. int kScalarsPerLdgA_ = 1, /// The number of scalars per LDG for B. @@ -62,7 +62,7 @@ struct DgemmConfig /// The tile size for the GEMM KxNxM. OutputTile_, /// The functor to do the math in the main loop. - ThreadMultiplyAdd, double, double, double>, + ThreadMultiplyAdd, double, double, double>, /// The number of scalars per LDG for A. kScalarsPerLdgA_, /// The number of scalars per STS for A. @@ -82,7 +82,14 @@ struct DgemmConfig /// The number of scalars per LDS for D. 1, /// The number of stages in shared memory. - 2> {}; + 2, + /// kResidueSeparate + false, + /// kResidueInPrologue + false, + /// kLaunchBounds + false + >{}; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -91,12 +98,12 @@ template < MatrixLayout::Kind kLayoutA_, /// The layout for B. MatrixLayout::Kind kLayoutB_, - /// The output tile. + /// The tile size for threadblock-level GEMM (K-by-N-by-M) typename OutputTile_ = Shape<8, 64, 128>, /// The functor to use in the epilogue. typename EpilogueFunctor_ = LinearScaling, - /// The number of accumulators per thread. - typename AccumulatorsPerThread_ = Shape<8, 8, 8>, + /// Tile size for thread-level GEMM (K-by-N-by-M) + typename ThreadGemmShape_ = Shape<8, 8, 8>, /// The number of doubles loaded in one LDG for A. int kScalarsPerLdgA_ = 1, /// The number of doubles loaded in one LDG for B. @@ -105,7 +112,7 @@ template < typename Index_ = int, /// The DGEMM config. typename GemmConfig_ = - DgemmConfig, + DgemmConfig, /// The traits class for the epilogue. typename GemmEpilogueTraits_ = SimplifiedGemmEpilogueTraits > diff --git a/cutlass/gemm/fp16_sgemm_multiply_add.h b/cutlass/gemm/fp16_sgemm_multiply_add.h new file mode 100644 index 000000000..534b8c899 --- /dev/null +++ b/cutlass/gemm/fp16_sgemm_multiply_add.h @@ -0,0 +1,83 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Template implementing matrix multiply-add operations on fragments. +*/ +#pragma once + +#include "cutlass/fragment.h" +#include "cutlass/gemm/thread_multiply_add.h" +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Template performing matrix multiply-add operation within a thread +template +struct ThreadMultiplyAdd { + /// The shape of the instruction. + typedef Shape<1, 1, 1, 1> InstructionShape; + /// The shape of a thread-leveel matrix multiply accumulate. + typedef ThreadGemmShape_ ThreadGemmShape; + /// Aliased to "AccumulatorsPerThread" for compatibility. Expect to be renamed in CUTLASS v2.0 + typedef ThreadGemmShape AccumulatorsPerThread; + /// The number of threads per warp. + typedef ThreadsPerWarp_ ThreadsPerWarp; + /// The number of accumulators per warp. + typedef typename ShapeMul::Shape AccumulatorsPerWarp; + /// The type for A. specialized to half + typedef half ScalarA; + /// The fragment for A. + typedef Fragment FragmentA; + /// The type for B. specialized to half + typedef half ScalarB; + /// The fragment for B. + typedef Fragment FragmentB; + /// The type for C and D. specialized to float + typedef float ScalarC; + /// The accumulators. + typedef Fragment Accumulators; + + /// Ctor. + CUTLASS_DEVICE ThreadMultiplyAdd() {} + + /// Multiply : d = a*b + c. + CUTLASS_DEVICE void multiply_add(FragmentA const& a, + FragmentB const& b, + Accumulators const& c, + Accumulators& d) { + for (int j = 0; j < AccumulatorsPerThread::kH; ++j) { + for (int i = 0; i < AccumulatorsPerThread::kW; ++i) { + d[j * AccumulatorsPerThread::kW + i] = static_cast(a[i]) * static_cast(b[j]) + c[j * AccumulatorsPerThread::kW + i]; + } + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/fp16_sgemm_traits.h b/cutlass/gemm/fp16_sgemm_traits.h new file mode 100644 index 000000000..361186455 --- /dev/null +++ b/cutlass/gemm/fp16_sgemm_traits.h @@ -0,0 +1,152 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defies structural properties of single-precision GEMM where any number of the input/output + could be fp16 or fp32. The accumulator type stays in fp32 +*/ +#pragma once + +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/gemm_epilogue.h" +#include "cutlass/gemm/gemm_epilogue_traits.h" +#include "cutlass/gemm/gemm_global_tile.h" +#include "cutlass/gemm/gemm_shared_tile.h" +#include "cutlass/gemm/gemm_traits.h" +#include "cutlass/gemm/fp16_sgemm_multiply_add.h" + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The tile size for the GEMM KxNxM. + typename OutputTile_, + /// Tile size for thread-level GEMM (K-by-N-by-M) + typename ThreadGemmShape_, + /// The type for A + typename ScalarA_, + /// The type for B + typename ScalarB_, + /// The type for C + typename ScalarC_, + /// The type for D + typename ScalarD_, + /// The number of scalars per LDG for A. + int kScalarsPerLdgA_ = 1, + /// The number of scalars per LDG for B. + int kScalarsPerLdgB_ = 1> +struct Fp16SgemmConfig : public GemmConfig< + /// The scalar type for A. + ScalarA_, + /// The scalar type for B. + ScalarB_, + /// The scalar type for C. + ScalarC_, + /// The scalar type for D. + ScalarD_, + /// The tile size for the GEMM KxNxM. + OutputTile_, + /// The functor to do the math in the main loop. + ThreadMultiplyAdd, ScalarA_, ScalarB_, float /*for sgemm accum is float*/>, + /// The number of scalars per LDG for A. + kScalarsPerLdgA_, + /// The number of scalars per STS for A. + kScalarsPerLdgA_, + /// The number of scalars per LDS for A. + 4, + /// The number of scalars per LDG for B. + kScalarsPerLdgB_, + /// The number of scalars per STS for B. + kScalarsPerLdgB_, + /// The number of scalars per LDS for B. + 4, + /// The number of scalars per LDG for C and STG for D. + 1, + /// The number of scalars per STS for D. + 4, + /// The number of scalars per LDS for D. + 1, + /// The number of stages in shared memory. + 2> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The output tile. + typename OutputTile_ = Shape<8, 128, 128>, + /// The type for A + typename ScalarA_ = half, + /// The type for B + typename ScalarB_ = half, + /// The type for C + typename ScalarC_ = half, + /// The type for D + typename ScalarD_ = half, + /// the Type for alpha and beta, + typename Scalar_ = half, + /// The functor to use in the epilogue. + typename EpilogueFunctor_ = LinearScaling >, + /// Tile size for thread-level GEMM (K-by-N-by-M) + typename ThreadGemmShape_ = Shape<8, 8, 8>, + /// The number of floats loaded in one LDG for A. + int kScalarsPerLdgA_ = 1, + /// The number of floats loaded in one LDG for B. + int kScalarsPerLdgB_ = 1, + /// The index. + typename Index_ = int, + /// The SGEMM config. + typename GemmConfig_ = + Fp16SgemmConfig, + /// The traits class for the epilogue. + typename GemmEpilogueTraits_ = + SimplifiedGemmEpilogueTraits > +struct Fp16SgemmSgemmTraits : public SimplifiedGemmTraits< + // The layout for A. + kLayoutA_, + // The layout for B. + kLayoutB_, + // The config. + GemmConfig_, + // The epilogue. + GemmEpilogue, + // The index. + Index_> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm.h b/cutlass/gemm/gemm.h index c50a3f04b..6340ab4f3 100644 --- a/cutlass/gemm/gemm.h +++ b/cutlass/gemm/gemm.h @@ -31,16 +31,17 @@ #include #endif -#include -#include - +#include "cutlass/coord.h" +#include "cutlass/util/platform.h" namespace cutlass { namespace gemm { //////////////////////////////////////////////////////////////////////////////////////////////////// +/// GEMM kernel with launch bounds specified template -__global__ /*__launch_bounds__(Gemm_::kThreads)*/ void gemm_kernel(typename Gemm_::Params params) { +__global__ __launch_bounds__(Gemm_::kThreads) +void gemm_kernel(typename Gemm_::Params params) { // Declare shared memory. __shared__ typename Gemm_::SharedStorage shared_storage; @@ -52,28 +53,37 @@ __global__ /*__launch_bounds__(Gemm_::kThreads)*/ void gemm_kernel(typename Gemm //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct GemmDesc { - /// The dimensions of the GEMM. - Index_ m, n, k; - /// The alpha/beta scaling values. - Scalar_ alpha, beta; - /// The source matrix A. - void const* d_a; - /// The stride for A. - Index_ lda; - /// The source matrix B. - void const* d_b; - /// The stride for B. - Index_ ldb; - /// The source matrix C. - void const* d_c; - /// The stride for C. - Index_ ldc; - /// The destination matrix D. - void* d_d; - /// The stride for D. - Index_ ldd; +/// GEMM kernel without launch bounds specified +template +__global__ /* __launch_bounds__(Gemm_::kThreads) */ +void gemm_kernel_nolb(typename Gemm_::Params params) { + // Declare shared memory. + __shared__ typename Gemm_::SharedStorage shared_storage; + + // Construct the GEMM object. + Gemm_ gemm(params, shared_storage); + // Run GEMM. + gemm.multiply_add(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Partial specialization for launching the GEMM kernel with or without launch bounds +template +struct Launch { + Launch(typename Gemm::Params params, dim3 grid, dim3 block, cudaStream_t stream = 0) { + gemm_kernel<<< grid, block, 0, stream >>>(params); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Partial specialization for launching the GEMM kernel with or without launch bounds +template +struct Launch { + Launch(typename Gemm::Params params, dim3 grid, dim3 block, cudaStream_t stream = 0) { + gemm_kernel_nolb<<< grid, block, 0, stream >>>(params); + } }; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -100,86 +110,52 @@ struct Gemm { /// The index. typedef typename Traits::Index Index; + /// Define the mainloop iteration size + typedef typename Traits::MultiplyAdd MultiplyAdd; + /// The number of threads. static int const kThreads = Traits::GemmConfig::kThreads; - /// The params. - struct Params : public Traits::Params { - CUTLASS_HOST_DEVICE int initialize(Index m, - Index n, - Index k, - ScalarEpilogue alpha, - ScalarA const* d_a, - Index lda, - ScalarB const* d_b, - Index ldb, - ScalarEpilogue beta, - ScalarC const* d_c, - Index ldc, - ScalarD* d_d, - Index ldd) { - GemmDesc desc; - desc.m = m; - desc.n = n; - desc.k = k; - desc.alpha = alpha; - desc.beta = beta; - desc.d_a = reinterpret_cast(d_a); - desc.lda = lda; - desc.d_b = reinterpret_cast(d_b); - desc.ldb = ldb; - desc.d_c = reinterpret_cast(d_c); - desc.ldc = ldc; - desc.d_d = reinterpret_cast(d_d); - desc.ldd = ldd; - return Traits::Params::initialize(desc); - } - }; + // Number of warp-level multiply-accumulate steps executed by each warp. + static Index const kWarpGemmSteps = + Traits::GemmConfig::AccumulatorsPerWarp::kD / MultiplyAdd::InstructionShape::kD; + // Make sure we have at least 2 unrolling steps or our pipeling is not going to work. + static_assert(kWarpGemmSteps >= 2, "The pipelining assumes at least two steps"); + + /// Use the params object defined in traits + typedef typename Traits::Params Params; + +// +// Static function members +// + +/// Support for NVRTC #if !defined(__CUDACC_RTC__) /// Launch the kernel. static __host__ cudaError_t launch(Params const& params, cudaStream_t stream = cudaStreamDefault) { - // Setup the grid. - dim3 grid; - grid.x = (params.m + Traits::OutputTile::kW - 1) / Traits::OutputTile::kW; - grid.y = (params.n + Traits::OutputTile::kH - 1) / Traits::OutputTile::kH; - - // The number of threads. - dim3 block; - block.x = kThreads; // Launch the kernel. - void const* params_ = reinterpret_cast(¶ms); + Launch( + params, params.grid, params.block, stream); - return cudaLaunchKernel(reinterpret_cast(&gemm_kernel), - grid, - block, - const_cast(¶ms_), - 0, - stream); + return cudaGetLastError(); } /// Launch the kernel. static __host__ cudaError_t launch(CUfunction kernel, Params const& params, CUstream stream = CU_STREAM_LEGACY) { - // Setup the grid. - dim3 grid; - grid.x = (params.m + Traits::OutputTile::kW - 1) / Traits::OutputTile::kW; - grid.y = (params.n + Traits::OutputTile::kH - 1) / Traits::OutputTile::kH; - - // The number of threads. - dim3 block; - block.x = kThreads; // Launch the kernel. void* params_[] = {const_cast(reinterpret_cast(¶ms))}; - // return cudaLaunchKernel(reinterpret_cast(&gemm_kernel), grid, block, - // const_cast(¶ms_), 0, stream); CUresult result = cuLaunchKernel( - kernel, grid.x, grid.y, grid.z, block.x, block.y, block.z, 0, stream, params_, 0); + kernel, + params.grid.x, params.grid.y, params.grid.z, + params.block.x, params.block.y, params.block.z, + 0, stream, params_, 0); if (result != CUDA_SUCCESS) { return cudaErrorLaunchFailure; @@ -189,39 +165,41 @@ struct Gemm { #endif + // + // Methods + // + /// Ctor. CUTLASS_DEVICE Gemm(Params const& params_, SharedStorage& shared_storage_) : params(params_), shared_storage(shared_storage_) {} - /// Consume a single iteration of the loop. - template - CUTLASS_DEVICE void consume_tile(typename Traits::GlobalLoadStream& global_stream, - typename Traits::SharedLoadStream& shared_load_stream, - typename Traits::MultiplyAdd::Accumulators& accumulators, + /// Computes a warp-level GEMM on data held in shared memory + template + CUTLASS_DEVICE void consume_tile(typename Traits::GlobalLoadStream& global_to_shared_stream, + typename Traits::SharedStream& shared_load_stream, + typename MultiplyAdd::Accumulators& accumulators, Index outer_k) { - // If that's the last "load iteration" update the predicates. - if (!kIsLastIteration) { - global_stream.move_to_residue(outer_k); + // If residue portion and not calculating residue in prolog, update residue predicates now. + if (Residue && outer_k <= Traits::OutputTile::kD) { + global_to_shared_stream.residue(outer_k); } - // Load data for the next iteration of the main loop. - if (!kIsLastIteration) { - global_stream.copy(); + // Load data for the next iteration of the main loop (unless it's the last iteration). + if (!LastIteration) { + global_to_shared_stream.copy(); } - // The unrolling steps for the main loop. - int const kUnrollingSteps = - Traits::MultiplyAdd::AccumulatorsPerWarp::kD / Traits::MultiplyAdd::InstructionShape::kD; - CUTLASS_PRAGMA_UNROLL - for (int step = 0; step < kUnrollingSteps - 1; ++step) { + for (int step = 0; step < kWarpGemmSteps - 1; ++step) { // Trigger the copy from shared memory for the next A/B values. shared_load_stream.copy(step + 1); + // Make sure the values are available for the current iteration to do the multiply-add. shared_load_stream.commit(step); + MultiplyAdd multiply_add; + // Do the math on the fragments of the current iteration. - typename Traits::MultiplyAdd multiply_add; multiply_add.multiply_add(shared_load_stream.fragment_a(step), shared_load_stream.fragment_b(step), accumulators, @@ -232,28 +210,25 @@ struct Gemm { Traits::shared_load_fence(true); // Commit the data in shared memory for A/B. - if (!kIsLastIteration) { - global_stream.commit(); + if (!LastIteration) { + global_to_shared_stream.commit(); } - // Make sure the data is in shared memory. Traits::shared_store_fence(true); - // Trigger the loads for the next iteration (if needed). - if (!kIsLastIteration) { + if (!LastIteration) { // Move to the next stage for the load (if it makes sense). shared_load_stream.inc_stage(); // Trigger the copy from shared memory for the next loop iteration. shared_load_stream.copy(0); } - // Make sure the values are available for the current iteration to do the multiply-add. - shared_load_stream.commit(kUnrollingSteps - 1); + shared_load_stream.commit(kWarpGemmSteps - 1); // Do the math on the fragments of the current iteration. - typename Traits::MultiplyAdd multiply_add; - multiply_add.multiply_add(shared_load_stream.fragment_a(kUnrollingSteps - 1), - shared_load_stream.fragment_b(kUnrollingSteps - 1), + MultiplyAdd multiply_add; + multiply_add.multiply_add(shared_load_stream.fragment_a(kWarpGemmSteps - 1), + shared_load_stream.fragment_b(kWarpGemmSteps - 1), accumulators, accumulators); } @@ -262,76 +237,112 @@ struct Gemm { CUTLASS_DEVICE void multiply_add() { // Swizzle the IDs of the block (to enable better cache behavior). typename Traits::BlockSwizzle block_swizzle; - dim3 block = block_swizzle.swizzle(); - - // Scale the id. - block.x *= Traits::OutputTile::kW; - block.y *= Traits::OutputTile::kH; + Coord<3> threadblock_offset = + block_swizzle.get_threadblock_offset(make_Coord_from_shape()); // We may want to use shared memory to clear the registers. typedef typename Traits::ClearAccumulators ClearAccumulators; // The streams to read A/B from global memory to shared memory. - typename Traits::GlobalLoadStream global_stream(params, shared_storage, block); + typename Traits::GlobalLoadStream global_to_shared_stream( + params.global_to_shared_stream, + shared_storage.main_loop.global_to_shared_stream, + shared_storage.main_loop.threadblock_tile.reference(), + params.problem_size.knm(), + threadblock_offset); + + // update A and B pointer offset based on batch_id and batch_stride_offset + //global_to_shared_stream.add_pointer_offset(block_swizzle.get_batch_id(), params.batch_stride_A, params.batch_stride_B); + global_to_shared_stream += make_Coord(block_swizzle.get_batch_id(), 0, 0); // Create the accumulator clear. - ClearAccumulators clear(shared_storage.main_loop.clear); + ClearAccumulators clear; - // By how much we unroll the main loop. - Index const kUnroll = static_cast(Traits::OutputTile::kD); - - // If we do not have enough steps in the main loop, trigger the residue code. - global_stream.move_to_residue(params.k); + // Deal with residue in prolog. + global_to_shared_stream.move_to_residue(params.problem_size[0], Traits::OutputTile::kD); // Fetch the fragments for A and B from global memory. - global_stream.copy(); + global_to_shared_stream.copy(); // Copy the elements to shared memory (after transformation if needed). - global_stream.commit(); + global_to_shared_stream.commit(); // Make sure the data is in shared memory. Traits::shared_store_fence(false); - // Rollback to the beginning of the GEMM-K dimension. It may have no impact. - global_stream.rollback(); - - // The unrolling steps for the main loop. - int const kUnrollingSteps = - Traits::MultiplyAdd::AccumulatorsPerWarp::kD / Traits::MultiplyAdd::InstructionShape::kD; - - // Make sure we have at least 2 unrolling steps or our pipeling is not going to work. - static_assert(kUnrollingSteps >= 2, "The pipelining assumes at least two steps"); + // Rollback to the beginning of the first tile (if residue exists). + global_to_shared_stream.rollback(params.problem_size[0] % Traits::OutputTile::kD); // The stream of data from shared memory to fragments. - typename Traits::SharedLoadStream shared_load_stream(params, shared_storage); + typename Traits::SharedStream shared_load_stream( + params.shared_stream, + shared_storage.main_loop.threadblock_tile.reference()); // Trigger the copy from shared memory for the 1st stream. shared_load_stream.copy(0); // Allocate the accumulators. - typename Traits::MultiplyAdd::Accumulators accumulators; + typename MultiplyAdd::Accumulators accumulators; + // Clear the accumulators. clear.clear(accumulators); - // The loop index. - Index outer_k = params.k - kUnroll; + // Initial index + Index outer_k = params.problem_size[0] - Traits::OutputTile::kD; - // Enter the main loop and iterate. - for (; outer_k > 0; outer_k -= kUnroll) { - consume_tile(global_stream, shared_load_stream, accumulators, outer_k); - } + // Check if we are computing residue in prolog or not. + if (Traits::GemmConfig::kResidueInProlog) { - // Residual loop. - for (; outer_k > -kUnroll; outer_k -= kUnroll) { - consume_tile(global_stream, shared_load_stream, accumulators, outer_k); + // Execute all mainloop iterations but the last one. + + CUTLASS_GEMM_LOOP + for (; outer_k > 0; outer_k -= Traits::OutputTile::kD) { + consume_tile( + global_to_shared_stream, shared_load_stream, accumulators, outer_k); + + } + + // Don't load data for the last "residue" portion since we've already computed the residue. + CUTLASS_GEMM_LOOP + for (; outer_k > -Traits::OutputTile::kD; outer_k -= Traits::OutputTile::kD) { + consume_tile( + global_to_shared_stream, shared_load_stream, accumulators, outer_k); + + } + } else { + // When kResidueSeparate = true, execute all mainloop iterations but the last two without any + // consideration for K-residue or predicate updates. This improves the steady state of some + // kernels. + if (Traits::GemmConfig::kResidueSeparate) { + + CUTLASS_GEMM_LOOP + for (; outer_k > Traits::OutputTile::kD; outer_k -= Traits::OutputTile::kD) { + consume_tile( + global_to_shared_stream, shared_load_stream, accumulators, outer_k); + + } + } + + // Execute remaining tiles with K-residue predicate updates enabled. + + CUTLASS_GEMM_LOOP + for (; outer_k > -Traits::OutputTile::kD; outer_k -= Traits::OutputTile::kD) { + consume_tile( + global_to_shared_stream, shared_load_stream, accumulators, outer_k); + + } } // Epilogue. typedef typename Traits::Epilogue Epilogue; - Epilogue epilogue(params.epilogue, shared_storage.epilogue, params.m, params.n); - epilogue.epilogue(cutlass::make_Coord(0, block.y, block.x), accumulators); + Epilogue epilogue(params.epilogue, shared_storage.epilogue, params.problem_size.knm()); + epilogue.epilogue(accumulators, threadblock_offset, block_swizzle.get_batch_id()); } + // + // Data members + // + /// The params. Params const& params; /// The shared storage. diff --git a/cutlass/gemm/gemm_config.h b/cutlass/gemm/gemm_config.h new file mode 100644 index 000000000..76df0add6 --- /dev/null +++ b/cutlass/gemm/gemm_config.h @@ -0,0 +1,145 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines properties of GEMM computation that impose some constraints on caller. +*/ +#pragma once + +#include "cutlass/shape.h" + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The scalar type for A. + typename ScalarA_, + /// The scalar type for B. + typename ScalarB_, + /// The scalar type for C. + typename ScalarC_, + /// The scalar type for D. + typename ScalarD_, + /// The threadblock tile size for the GEMM KxNxM. + typename OutputTile_, + /// The functor to do the math. + typename MultiplyAdd_, + /// The number of scalars per LDG for A. + int kScalarsPerLdgA_, + /// The number of scalars per STS for A. + int kScalarsPerStsA_, + /// The number of scalars per LDG for A. + int kScalarsPerLdsA_, + /// The number of scalars per LDG for B. + int kScalarsPerLdgB_, + /// The number of scalars per STS for B. + int kScalarsPerStsB_, + /// The number of scalars per LDS for B. + int kScalarsPerLdsB_, + /// The number of scalars per LDG for C and STG for D. + int kScalarsPerLdgCAndStgD_, + /// The number of scalars per STS for D. + int kScalarsPerStsD_, + /// The number of scalars per LDS for D. + int kScalarsPerLdsD_, + /// The number of stages in shared memory to do single/double/triple-buffering. + int kStages_, + /// If true, residue is computed in mainloop. If false, separate loops are instantiated. + bool kResidueSeparate_ = false, + /// Is residue performed in prologue? + bool kResidueInProlog_ = false, + /// If true, kernel is launched with CUDA launch bounds specified + bool kLaunchBounds_ = true> +struct GemmConfig { + // + /// The scalar for A. + typedef ScalarA_ ScalarA; + /// The scalar for B. + typedef ScalarB_ ScalarB; + /// The scalar for C. + typedef ScalarC_ ScalarC; + /// The scalar for D. + typedef ScalarD_ ScalarD; + + /// The tile. + typedef OutputTile_ OutputTile; + /// The functor to do D = A*B + C. + typedef MultiplyAdd_ MultiplyAdd; + /// The shape of the instruction. + typedef typename MultiplyAdd::InstructionShape InstructionShape; + /// The shape of warp-level GEMM + typedef typename MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp; + /// The accumulators. + typedef typename MultiplyAdd::Accumulators Accumulators; + + /// The number of warps. + typedef typename ShapeDiv::Shape Warps; + /// The default warp size (32 threads per warp). + static int const kWarpSize = cutlass::kWarpSize; + /// The numnber of threads. + static int const kThreads = ShapeCount::kCount * kWarpSize; + + /// The number of scalars per LDG/STS/LDS for A. + static int const kScalarsPerLdgA = kScalarsPerLdgA_; + static int const kScalarsPerStsA = kScalarsPerStsA_; + static int const kScalarsPerLdsA = kScalarsPerLdsA_; + + /// The number of scalars per LDG/STS/LDS for B. + static int const kScalarsPerLdgB = kScalarsPerLdgB_; + static int const kScalarsPerStsB = kScalarsPerStsB_; + static int const kScalarsPerLdsB = kScalarsPerLdsB_; + + /// The number of scalars per LDG for C. + static int const kScalarsPerLdgC = kScalarsPerLdgCAndStgD_; + + /// The number of scalars per STS/LDS/STG for D. + static int const kScalarsPerStgD = kScalarsPerLdgCAndStgD_; + static int const kScalarsPerStsD = kScalarsPerStsD_; + static int const kScalarsPerLdsD = kScalarsPerLdsD_; + + /// The number of accumulators that are going to be fed from one LDS A/B. + static int const kAccumulatorsPerLdsA = kScalarsPerLdsA / InstructionShape::kD; + static int const kAccumulatorsPerLdsB = kScalarsPerLdsB / InstructionShape::kD; + + /// The number of stages in shared memory to implement double, triple, more-buffering. + static int const kStages = kStages_; + + /// If true, mainloop is instantiated twice. The first instantiation contains no predicate + // updates and is more efficient for some kernels. If false, only a single mainloop is + // instantaited. + static bool const kResidueSeparate = kResidueSeparate_; + + /// If true, residue is computed in the prologue. + static bool const kResidueInProlog = kResidueInProlog_; + + /// If true, kernel is launched with launch bounds specified + static bool const kLaunchBounds = kLaunchBounds_; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_coord.h b/cutlass/gemm/gemm_coord.h new file mode 100644 index 000000000..8e36bb043 --- /dev/null +++ b/cutlass/gemm/gemm_coord.h @@ -0,0 +1,203 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief GemmCoord is a structure derived from Coord<4> that specifies a location within the + coordinate system of a GEMM problem. +*/ + +#pragma once + +#include "cutlass/cutlass.h" +#include "cutlass/coord.h" +#include "cutlass/util/platform.h" + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// GemmCoord is a structure derived from Coord<4> that specifies a location within the +/// coordinate space of a GEMM problem. +struct GemmCoord : public Coord<4, int> { + + /// Integer-valued index + typedef int Index; + + /// Base type is a Coord of rank=4 + typedef Coord<4, Index> Base; + + /// GEMM K dimension - inner dimension of the GEMM problem + static int const kK = 0; + + /// GEMM N dimension - columns of the output C matrix + static int const kN = 1; + + /// GEMM M dimension - rows of the output C matrix + static int const kM = 2; + + /// Batch dimension - for generalizing to larger problems + static int const kBatch = 3; + + // + // Methods + // + + /// Default ctor + CUTLASS_HOST_DEVICE + GemmCoord() { } + + /// Constructs from Coord<3> and a batch + CUTLASS_HOST_DEVICE + GemmCoord(Coord<3, Index> const &coord, Index _batch = 0): Base(make_Coord(coord[0], coord[1], coord[2], _batch)) { } + + /// Constructs from Coord<4> + CUTLASS_HOST_DEVICE + GemmCoord(Coord<4, Index> const &coord): Base(coord) { } + + /// Constructs from an array of coordinate elements + CUTLASS_HOST_DEVICE + GemmCoord(Index coord[4]): Base(coord) { } + + /// Helper to construct from a K, N, M, batch variables + CUTLASS_HOST_DEVICE + GemmCoord(Index k, Index n, Index m, Index batch = 0): Base(make_Coord(k, n, m, batch)) { } + + /// Returns the GEMM M coordinate + CUTLASS_HOST_DEVICE + Index const & m() const { return this->at(kM); } + + /// Returns reference to the GEMM M coordinate + CUTLASS_HOST_DEVICE + Index & m() { return this->at(kM); } + + /// Returns the GEMM N coordinate + CUTLASS_HOST_DEVICE + Index const & n() const { return this->at(kN); } + + /// Returns reference to the GEMM N coordinate + CUTLASS_HOST_DEVICE + Index & n() { return this->at(kN); } + + /// Returns the GEMM K coordinate + CUTLASS_HOST_DEVICE + Index const & k() const { return this->at(kK); } + + /// Returns reference to the GEMM K coordinate + CUTLASS_HOST_DEVICE + Index & k() { return this->at(kK); } + + /// Returns the GEMM batch coordinate + CUTLASS_HOST_DEVICE + Index const & batch() const { return this->at(kBatch); } + + /// Returns reference to the GEMM batch coordinate + CUTLASS_HOST_DEVICE + Index & batch() { return this->at(kBatch); } + + /// Obtains a Coord<3> from GemmCoord + CUTLASS_HOST_DEVICE + Coord<3> knm() const { + return make_Coord(k(), n(), m()); + } + + /// Obtains a Coord<2> from GemmCoord + CUTLASS_HOST_DEVICE + Coord<2> nm() const { + return make_Coord(n(), m()); + } + + /// Obtains a Coord<2> from GemmCoord + CUTLASS_HOST_DEVICE + Coord<2> km() const { + return make_Coord(k(), m()); + } + + /// Obtains a Coord<2> from GemmCoord + CUTLASS_HOST_DEVICE + Coord<2> kn() const { + return make_Coord(k(), n()); + } + + // + // Coord operators + // + + /// Element-wise addition + CUTLASS_HOST_DEVICE + GemmCoord operator+(Base const& b) const { + return GemmCoord(Base::operator+(b)); + } + + /// Element-wise subtraction + CUTLASS_HOST_DEVICE + GemmCoord operator-(Base const& b) const { + return GemmCoord(Base::operator-(b)); + } + + /// Element-wise multiplication + CUTLASS_HOST_DEVICE + GemmCoord operator*(Base const& b) const { + return GemmCoord(Base::operator*(b)); + } + + /// Element-wise division + CUTLASS_HOST_DEVICE + GemmCoord operator/(Base const& b) const { + return GemmCoord(Base::operator/(b)); + } + + /// In-place addition + CUTLASS_HOST_DEVICE + GemmCoord& operator+=(Base const& b) { + Base::operator+=(b); + return *this; + } + + /// In-place subtraction + CUTLASS_HOST_DEVICE + GemmCoord& operator-=(Base const& b) { + Base::operator-=(b); + return *this; + } + + /// In-place multiplication + CUTLASS_HOST_DEVICE + GemmCoord& operator*=(Base const& b) { + Base::operator*=(b); + return *this; + } + + /// In-place division + CUTLASS_HOST_DEVICE + GemmCoord& operator/=(Base const& b) { + Base::operator/=(b); + return *this; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_desc.h b/cutlass/gemm/gemm_desc.h new file mode 100644 index 000000000..80f4b3655 --- /dev/null +++ b/cutlass/gemm/gemm_desc.h @@ -0,0 +1,205 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements a software-pipelined efficient GEMM. +*/ +#pragma once + +#include "cutlass/tensor_ref.h" +#include "cutlass/gemm/gemm_coord.h" + +namespace cutlass { +namespace gemm { + +/// GEMM problem description +template < + /// Source accumulator matrix type + typename AType_, + /// Destination accumulator type + typename BType_, + /// Source accumulator matrix type + typename CType_, + /// Destination accumulator type + typename DType_, + /// Scalar type for alpha and beta + typename SType_, + /// Index type for dimensions and strides + typename Index_ = int +> struct GemmDesc { + // + // Type definitions + // + + /// Index type for dimensions and strides + typedef Index_ Index; + + /// Source accumulator matrix type + typedef AType_ AType; + + /// Tensor reference to A operand + typedef TensorRef TensorRefA; + + /// Destination accumulator type + typedef BType_ BType; + + /// Tensor reference to B operand + typedef TensorRef TensorRefB; + + /// Source accumulator matrix type + typedef CType_ CType; + + /// Tensor reference to C operand + typedef TensorRef TensorRefC; + + /// Destination accumulator type + typedef DType_ DType; + + /// Tensor reference to D operand + typedef TensorRef TensorRefD; + + /// Scalar type for alpha and beta + typedef SType_ SType; + + // + // Data members + // + + /// The dimensions of the GEMM. + GemmCoord problem_size; + + /// The alpha scaling values. + SType alpha; + + /// The source matrix A. + TensorRefA A; + + /// batch stride for A operand + long long batch_stride_A; + + /// The source matrix B. + TensorRefB B; + + /// batch stride for B operand + long long batch_stride_B; + + /// The beta scaling values. + SType beta; + + /// The source matrix C. + TensorRefC C; + + /// batch stride for C operand + long long batch_stride_C; + + /// The destination matrix D. + TensorRefD D; + + /// batch stride for D operand + long long batch_stride_D; + + // + // Methods + // + + /// Default ctor + CUTLASS_HOST_DEVICE + GemmDesc(): problem_size(0, 0, 0, 1), alpha(1), beta(0) {} + + /// Constructor for basic GEMM with batch count = 1 + CUTLASS_HOST_DEVICE + GemmDesc(Coord<3> _problem_size, + SType _alpha, + TensorRefA const &_A, + TensorRefB const &_B, + SType _beta, + TensorRefC const &_C, + TensorRefD const &_D + ): + problem_size(_problem_size[0], _problem_size[1], _problem_size[2], 1), + alpha(_alpha), + A(_A), + batch_stride_A(0), + B(_B), + batch_stride_B(0), + beta(_beta), + C(_C), + batch_stride_C(0), + D(_D), + batch_stride_D(0) {} + + /// Constructor for basic GEMM with batch count = 1 + CUTLASS_HOST_DEVICE + GemmDesc(GemmCoord _problem_size, + SType _alpha, + TensorRefA const &_A, + TensorRefB const &_B, + SType _beta, + TensorRefC const &_C, + TensorRefD const &_D + ): + problem_size(_problem_size.k(), _problem_size.n(), _problem_size.m(), 1), + alpha(_alpha), + A(_A), + batch_stride_A(0), + B(_B), + batch_stride_B(0), + beta(_beta), + C(_C), + batch_stride_C(0), + D(_D), + batch_stride_D(0) { + + assert(_problem_size.batch() == 1); + } + + /// Constructor for strided batch GEMM GEMM + CUTLASS_HOST_DEVICE + GemmDesc(GemmCoord _problem_size, + SType _alpha, + TensorRefA const &_A, + long long _batch_stride_A, + TensorRefB const &_B, + long long _batch_stride_B, + SType _beta, + TensorRefC const &_C, + long long _batch_stride_C, + TensorRefD const &_D, + long long _batch_stride_D + ): + problem_size(_problem_size), + alpha(_alpha), + A(_A), + batch_stride_A(_batch_stride_A), + B(_B), + batch_stride_B(_batch_stride_B), + beta(_beta), + C(_C), + batch_stride_C(_batch_stride_C), + D(_D), + batch_stride_D(_batch_stride_D) {} +}; + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_epilogue.h b/cutlass/gemm/gemm_epilogue.h index bc2530777..d9469bb55 100644 --- a/cutlass/gemm/gemm_epilogue.h +++ b/cutlass/gemm/gemm_epilogue.h @@ -29,26 +29,15 @@ */ #pragma once -#include -#include -#include +#include "cutlass/convert.h" +#include "cutlass/coord.h" +#include "cutlass/fragment.h" namespace cutlass { namespace gemm { //////////////////////////////////////////////////////////////////////////////////////////////////// -template -CUTLASS_DEVICE bool is_zero(T x) { - return x == T(0); -} - -#if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16) -CUTLASS_DEVICE bool is_zero(half x) { return reinterpret_cast(x) == int16_t(0); } -#endif - -//////////////////////////////////////////////////////////////////////////////////////////////////// - template struct GemmEpilogue { /// The traits class. @@ -85,9 +74,7 @@ struct GemmEpilogue { /// The shared store transformer for D. typedef typename Traits::SharedStoreTransformerD SharedStoreTransformerD; /// The iterator to load D in shared memory. - typedef typename Traits::SharedLoadIteratorD SharedLoadIteratorD; - /// The shared load transformer for D. - typedef Copy SharedLoadTransformerD; + typedef typename Traits::SharedLoadStreamD SharedLoadStreamD; /// The index. typedef typename Traits::Index Index; @@ -100,33 +87,28 @@ struct GemmEpilogue { /// Ctor. CUTLASS_DEVICE GemmEpilogue(Params const& params_, SharedStorage& shared_storage_, - Index m_, - Index n_) - : params(params_), shared_storage(shared_storage_), m(m_), n(n_) {} + Coord<3> const& _problem_size) + : params(params_), shared_storage(shared_storage_), problem_size(_problem_size), functor(params_.functor) {} /// Execute the epilogue. - CUTLASS_DEVICE void epilogue(Coord<3> const& block, Accumulators& accumulators) { - if (is_zero(params.functor.beta)) { - epilogue_with_or_without_beta(block, accumulators); + CUTLASS_DEVICE void epilogue(Accumulators& accumulators, + Coord<3> const& block = make_Coord(0, 0, 0), + int batch_id = 0) { + if (functor.source_required()) { + epilogue_with_or_without_beta(accumulators, block, batch_id); } else { - epilogue_with_or_without_beta(block, accumulators); + epilogue_with_or_without_beta(accumulators, block, batch_id); } } - template - CUTLASS_DEVICE void epilogue_with_or_without_beta(Coord<3> const& block, - Accumulators& accumulators) { - - // The problem size. - Coord<3> const bounds = cutlass::make_Coord(0, n, m); - - // The functor. - Functor functor(params.functor); + template + CUTLASS_DEVICE void epilogue_with_or_without_beta(Accumulators& accumulators, + Coord<3> const& block, + int batch_id) { // The C fragment. typename GlobalLoadIteratorC::Fragment fragment_c; // The transformed C fragment. typename GlobalTransformerC::OutputFragment transformed_c; - CUTLASS_PRAGMA_UNROLL for (int h = 0; h < Iterations::kH; ++h) { // Compute pointer and predicate offsets for C and D global iterators. @@ -136,6 +118,7 @@ struct GemmEpilogue { Iterations::kW + params.stride_h) * h; + int const predicate_offset = ((params.iterator_d.predicate_inc_h * (GlobalStoreIteratorD::Iterations::kH - 1) + params.iterator_d.predicate_inc_advance) * @@ -145,32 +128,40 @@ struct GemmEpilogue { // The iterator to load the elements of the C matrix. GlobalLoadIteratorC global_load_iterator( - params.iterator_c, bounds, block, pointer_offset, predicate_offset); + params.iterator_c, problem_size, block, pointer_offset, predicate_offset); + + // update C pointer offset based on batch_id and batch_stride_offset + //global_load_iterator.add_pointer_offset(batch_id * params.batch_stride_offset_c); + global_load_iterator += make_Coord(batch_id, 0, 0); + // The transformer for C. GlobalTransformerC transformer_c; // The transformer for D. GlobalTransformerD transformer_d; // The iterator to store into the D matrix. GlobalStoreIteratorD global_store_iterator( - params.iterator_d, bounds, block, pointer_offset, predicate_offset); + params.iterator_d, problem_size, block, pointer_offset, predicate_offset); + + // update D pointer offset based on batch_id and batch_stride_offset + //global_store_iterator.add_pointer_offset(batch_id * params.batch_stride_offset_d); + global_store_iterator += make_Coord(batch_id, 0, 0); - // The transformer to transform before storing to shared memory. SharedStoreTransformerD shared_store_transformer; typename SharedStoreTransformerD::OutputFragment shared_store_transformed_d; - // The iterator to store to shared memory. - SharedStoreIteratorD shared_store_iterator(params.shared_store_iterator_d, - shared_storage.shared_stream.store); + SharedStoreIteratorD shared_store_iterator( + params.shared_store_iterator_d, + reinterpret_cast(shared_storage.data())); - // The iterator to load from shared memory. TODO: Use a stream. - SharedLoadIteratorD shared_load_iterator(params.shared_load_iterator_d, - shared_storage.shared_stream.load); + SharedLoadStreamD shared_load_stream( + params.shared_load_stream_d, + reinterpret_cast(shared_storage.data())); CUTLASS_PRAGMA_UNROLL for (int w = 0; w < Iterations::kW; ++w) { // Load the C matrix into fragment. - if (!kBetaIsZero_) { - iterator_load(global_load_iterator, fragment_c); + if (kSourceRequired) { + global_load_iterator.load_post_increment(fragment_c); } // Make sure we can write to shared memory. @@ -180,33 +171,33 @@ struct GemmEpilogue { int const offset = (h * Iterations::kW + w) * SharedStoreIteratorD::Fragment::kElements; shared_store_transformer.transform(accumulators, offset, shared_store_transformed_d); - shared_iterator_store(shared_store_iterator, shared_store_transformed_d); + shared_store_iterator.store_post_increment(shared_store_transformed_d); // Make sure the data is in shared memory. shared_store_fence(); // Copy the accumulators back to registers from shared memory. - typename SharedLoadIteratorD::Fragment fetched_d; - shared_iterator_load(shared_load_iterator, fetched_d); + shared_load_stream.copy(); + shared_load_stream.commit(); // Do the math. typename GlobalTransformerD::InputFragment fragment_d; - if (kBetaIsZero_) { - functor.evaluate(fetched_d, fragment_d); - } else { + if (kSourceRequired) { // Transform C fragment. transformer_c.transform(fragment_c, transformed_c); // Do the math. - functor.evaluate(fetched_d, transformed_c, fragment_d); + functor.evaluate(shared_load_stream.fragment(), transformed_c, fragment_d); + } else { + functor.evaluate(shared_load_stream.fragment(), fragment_d); } // Transform D fragment. - typename GlobalTransformerD::OutputFragment transformed_d; - transformer_d.transform(fragment_d, transformed_d); + typename GlobalTransformerD::OutputFragment global_transformed_d; + transformer_d.transform(fragment_d, global_transformed_d); // Copy the results to global memory. - iterator_store(global_store_iterator, transformed_d); + global_store_iterator.store_post_increment(global_transformed_d); } } } @@ -222,7 +213,9 @@ struct GemmEpilogue { /// The shared storage. SharedStorage& shared_storage; /// The dimensions of the GEMM. - Index m, n; + Coord<3> problem_size; + // The functor. + Functor functor; }; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/cutlass/gemm/gemm_epilogue_traits.h b/cutlass/gemm/gemm_epilogue_traits.h index c06fc2502..c6aff71e1 100644 --- a/cutlass/gemm/gemm_epilogue_traits.h +++ b/cutlass/gemm/gemm_epilogue_traits.h @@ -27,13 +27,13 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include +#include "cutlass/convert.h" +#include "cutlass/coord.h" +#include "cutlass/gemm/gemm_global_stream.h" +#include "cutlass/gemm/gemm_shared_stream.h" +#include "cutlass/gemm/linear_scaling.h" +#include "cutlass/reshape_tile.h" +#include "cutlass/tile_iterator.h" namespace cutlass { namespace gemm { @@ -57,8 +57,8 @@ template < typename SharedStoreIteratorD_, /// The shared store transformer for D. typename SharedStoreTransformerD_, - /// The iterator to load D from shared memory. - typename SharedLoadIteratorD_, + /// The stream to load D from shared memory. + typename SharedLoadStreamD_, /// The number of iterations in the epilogue. typename Iterations_, /// The iterations strides. @@ -86,8 +86,8 @@ struct GemmEpilogueTraits { typedef SharedStoreIteratorD_ SharedStoreIteratorD; /// The shared store transformer for D. typedef SharedStoreTransformerD_ SharedStoreTransformerD; - /// The iterator to store D in shared memory. - typedef SharedLoadIteratorD_ SharedLoadIteratorD; + /// The stream to store D in shared memory. + typedef SharedLoadStreamD_ SharedLoadStreamD; /// typedef typename GemmConfig::EpilogueIterations Iterations; typedef Iterations_ Iterations; /// The iterations strides. @@ -118,14 +118,15 @@ struct GemmEpilogueTraits { typename GlobalStoreIteratorD::Params iterator_d; /// The params for the D shared store iterator. typename SharedStoreIteratorD::Params shared_store_iterator_d; - /// The params for the D shared load iterator. - typename SharedLoadIteratorD::Params shared_load_iterator_d; + /// The params for the D shared load stream. + typename SharedLoadStreamD::Params shared_load_stream_d; /// The functor params. typename Functor::Params functor; /// Setup the params. template CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) { + // The parameters for the functor. int error_code = functor.initialize(desc); if (error_code) { @@ -133,20 +134,27 @@ struct GemmEpilogueTraits { } // At the end of the H iteration, we jump over a number of columns. - this->stride_h = desc.ldd * Delta::kH; + this->stride_h = desc.D.leading_dim() * Delta::kH; // Nothing to do here. this->stride_w = 0; - // Setup the params for the global memory iterator for C. - error_code = iterator_c.initialize( - reinterpret_cast(desc.d_c), desc.ldc, desc.n, stride_w, Delta::kW); + error_code = iterator_c.initialize(desc.C.data(), + desc.batch_stride_C, + desc.C.leading_dim(), + desc.problem_size[1], + stride_w, + Delta::kW); if (error_code) { return error_code; } // Setup the params for the global memory iterator for D. - return iterator_d.initialize( - reinterpret_cast(desc.d_d), desc.ldd, desc.n, stride_w, Delta::kW); + return iterator_d.initialize(desc.D.data(), + desc.batch_stride_D, + desc.D.leading_dim(), + desc.problem_size[1], + stride_w, + Delta::kW); } }; @@ -155,13 +163,20 @@ struct GemmEpilogueTraits { // The storage for the store iterator. typename SharedStoreIteratorD::SharedStorage store; // The storage for the store iterator. - typename SharedLoadIteratorD::SharedStorage load; + typename SharedLoadStreamD::SharedStorage load; }; /// The shared memory to swizzle the data in the epilogue. struct SharedStorage { // The storage for the shared stream D. StreamSharedStorage shared_stream; + + // + // + // + + CUTLASS_DEVICE + ScalarD* data() { return reinterpret_cast(&shared_stream.load); } }; }; @@ -192,7 +207,10 @@ struct GemmEpilogueTraitsHelper { /// The traits class to build the iterator to store to shared memory for D. typedef GemmSharedStoreTileDTraits< // The pointer is float. - typename Functor::Scalar, + // typename Functor::Scalar, + // Functor::Scalar is alpha, beta type, in mixed precision, alpha and beta may not be the same with accumulation. + // In this case Functor::ScalarAccum is needed + typename Functor::ScalarAccum, // The output tile size. typename GemmConfig_::OutputTile, // The number of warps. @@ -221,7 +239,10 @@ struct GemmEpilogueTraitsHelper { /// The traits class to build the iterator to load from shared memory for D. typedef GemmSharedLoadTileDTraits< // The pointer is float. - typename Functor::Scalar, + // typename Functor::Scalar, + // Functor::Scalar is alpha, beta type, in mixed precision, alpha and beta may not be the same with accumulation. + // In this case Functor::ScalarAccum is needed + typename Functor::ScalarAccum, // The output tile size. typename GemmConfig_::OutputTile, // The number of warps. @@ -242,6 +263,8 @@ struct GemmEpilogueTraitsHelper { IteratorAdvance::kH, MemorySpace::kShared> SharedLoadIteratorD; + /// The stream to load D. + typedef SharedLoadStream SharedLoadStreamD; /// The traits class to build the iterator to load data from global memory for C^N. typedef GemmGlobalTileCdTraits< @@ -314,8 +337,8 @@ struct SimplifiedGemmEpilogueTraits : public GemmEpilogueTraits< typename Helper_::SharedStoreIteratorD, // The shared store transformer for D. typename Helper_::SharedStoreTransformerD, - // The iterator to load D from shared memory. - typename Helper_::SharedLoadIteratorD, + // The stream to load D from shared memory. + typename Helper_::SharedLoadStreamD, // The number of iterations. typename Helper_::Iterations, // The strides between iterations. diff --git a/cutlass/gemm/gemm_global_stream.h b/cutlass/gemm/gemm_global_stream.h index ec675a38f..6ea72cf30 100644 --- a/cutlass/gemm/gemm_global_stream.h +++ b/cutlass/gemm/gemm_global_stream.h @@ -29,9 +29,10 @@ */ #pragma once -#include -#include -#include +#include "cutlass/coord.h" +#include "cutlass/convert.h" +#include "cutlass/gemm/gemm_global_tile.h" +#include "cutlass/tile_allocation.h" namespace cutlass { namespace gemm { @@ -39,6 +40,8 @@ namespace gemm { //////////////////////////////////////////////////////////////////////////////////////////////////// template < + /// Identifies multiplicand + GemmOperand::Kind Operand, /// The load iterator. typename LoadIterator_, /// The store iterator to copy to shared memory. @@ -46,7 +49,9 @@ template < /// The transformer to be applied after the data has been copied from global memory. typename Transformer_> -struct GlobalLoadStreamBase { +struct GlobalLoadStream { + /// Indicates the type of GEMM operand + static GemmOperand::Kind const kOperand = Operand; /// The load iterator. typedef LoadIterator_ LoadIterator; /// The transformer. @@ -75,6 +80,15 @@ struct GlobalLoadStreamBase { typedef typename LoadIterator::Pointer Pointer; /// The index. typedef typename LoadIterator::Index Index; + /// The tile + typedef typename LoadIterator::Tile Tile; + + /// Shared memory allocation for the tile + typedef TileAllocation + ThreadblockTileStorage; + + /// Tensor reference to threadblock tile + typedef typename ThreadblockTileStorage::TensorRef ThreadblockTileRef; /// The params. struct Params { @@ -82,56 +96,73 @@ struct GlobalLoadStreamBase { typename LoadIterator::Params load_iterator; // The store iterator. typename StoreIterator::Params store_iterator; + // Offset to residue. + Index offset_to_residue; /// Setup the params. - template - CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc, Pointer pointer, Index ld) { - int error_code = load_iterator.initialize(desc, pointer, ld); + CUTLASS_HOST_DEVICE int initialize(Pointer pointer, + long long batch_stride, + Index ldm, + Index _offset_to_residue) { + + offset_to_residue = _offset_to_residue; + int error_code = load_iterator.initialize(pointer, batch_stride, ldm); if (error_code) { return error_code; } - return store_iterator.initialize(); } }; - /// The amount of storage in shared memory needed to store the tile. - typedef typename StoreIterator::SharedStorage SharedStoreStorage; + /// Contains private storage in shared memory needed by the objects within this class. Note, + /// this is *NOT* the shared memory allocation for the GEMM threadblock tile. That necessarily + /// exists outside this class, as it is also needed by the warp-level shared=>RF stream. + struct SharedStorage {}; - /// The storage in shared memory needed by that stream. - union SharedStorage { - // The load iterator. - typename LoadIterator::SharedStorage load_iterator; - // The store iterator. - SharedStoreStorage store_iterator; - }; + // + // Static member functions + // + + /// Maps a coordinate in the GEMM's (K, N, M) coordinate system to global memory + CUTLASS_DEVICE static Coord<3> project_coordinate(Coord<3> const& coord, Index d_offset = 0) { + bool const kKstrided = + GemmMultiplicandTraits::kKstrided; + Coord<3> tile_coord = ProjectOperand::project(coord); + return make_Coord( + tile_coord[0] + d_offset, tile_coord[1], tile_coord[2] / LoadIterator::Tile::kC); + } /// Ctor. - CUTLASS_DEVICE GlobalLoadStreamBase(Params const& params, - SharedStorage& shared_storage, - Coord<3> const bounds, - Coord<3> const& block) - : load_iterator(params.load_iterator, bounds, block), + CUTLASS_DEVICE GlobalLoadStream( + Params const& _params, + SharedStorage& shared_storage, + ThreadblockTileRef const& threadblock_tile_ref, + Coord<3> const bounds, + Coord<3> const& _threadblock_offset) + : params(_params), + multiplicand_bounds(project_coordinate(bounds, 1)), + threadblock_offset(project_coordinate(_threadblock_offset)), + load_iterator(params.load_iterator, + project_coordinate(bounds, 1), /*multiplicant_bounds*/ + project_coordinate(_threadblock_offset) /*threablock_offset*/), transformer(), - store_iterator(params.store_iterator, shared_storage.store_iterator) - + store_iterator(params.store_iterator, threadblock_tile_ref.data()) { + load_iterator.initialize_predicates(multiplicand_bounds, threadblock_offset); fetched_fragment.clear(); } + /// Load the data from shared memory to the fetch fragment. - CUTLASS_DEVICE void copy() { iterator_load(load_iterator, fetched_fragment); } + CUTLASS_DEVICE void copy() { load_iterator.load_post_increment(fetched_fragment); } /// Commit the data. CUTLASS_DEVICE void commit() { transformer.transform(fetched_fragment, transformed_fragment); - iterator_store(store_iterator, transformed_fragment); + store_iterator.store_post_increment(transformed_fragment); store_iterator.inc_stage(); } - /// Move to the beginning of the residue code. That's a new code path in CUTLASS 1.0.1. - CUTLASS_DEVICE void move_to_residue(Index k) { load_iterator.move_to_residue(k); } - /// Execute the residue code. CUTLASS_DEVICE void residue(Index k, bool skip_clear = false) { load_iterator.residue(k); @@ -140,9 +171,43 @@ struct GlobalLoadStreamBase { } } - /// Rollback to the beginning of the GEMM-k dimension. - CUTLASS_DEVICE void rollback() { load_iterator.rollback(); } + /// Move to the residue portion. + CUTLASS_DEVICE void move_to_residue(Index k, Index kTileK) { + Index kResidue = k % kTileK; + if (kResidue) { + residue(kResidue); + } + load_iterator.add_pointer_offset(params.offset_to_residue * load_iterator.stride_advance()); + } + /// Rollback to the beginning of the first tile + CUTLASS_DEVICE void rollback(void) { + load_iterator.initialize_predicates(multiplicand_bounds, threadblock_offset); + + int const kBlock = kOperand == GemmOperand::kA + ? (kLayout == MatrixLayout::kColumnMajor ? Tile::kH : Tile::kW) + : (kLayout == MatrixLayout::kRowMajor ? Tile::kH : Tile::kW); + + load_iterator.add_pointer_offset(-(params.offset_to_residue + kBlock) * + load_iterator.stride_advance()); + } + + /// Adds a Coord<3> to the underlying global load iterator + CUTLASS_DEVICE GlobalLoadStream &operator+=(Coord<3> const &offset) { + load_iterator += offset; + return *this; + } + + // + // Data members + // + + /// Parameters + Params params; + /// Multiplicand bounds + Coord<3> multiplicand_bounds; + /// Threadblock offset + Coord<3> threadblock_offset; /// The iterator. LoadIterator load_iterator; /// The fragment to fetch from shared memory. @@ -155,28 +220,6 @@ struct GlobalLoadStreamBase { StoreIterator store_iterator; }; -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template < - /// The load iterator. - typename LoadIterator_, - /// The store iterator to copy to shared memory. - typename StoreIterator_, - /// The transformer to be applied after the data has been copied from global memory. - typename Transformer_ = Copy > - -struct GlobalLoadStream : public GlobalLoadStreamBase { - /// The base class. - typedef GlobalLoadStreamBase Base; - - /// Ctor. - CUTLASS_DEVICE GlobalLoadStream(typename Base::Params const& params, - typename Base::SharedStorage& shared_storage, - Coord<3> const& bounds, - Coord<3> const& block) - : Base(params, shared_storage, bounds, block) {} -}; - //////////////////////////////////////////////////////////////////////////////////////////////////// } // namespace gemm } // namespace cutlass diff --git a/cutlass/gemm/gemm_global_tile.h b/cutlass/gemm/gemm_global_tile.h index 1cc3b3377..a355ebea0 100644 --- a/cutlass/gemm/gemm_global_tile.h +++ b/cutlass/gemm/gemm_global_tile.h @@ -27,14 +27,14 @@ */ #pragma once -#include -#include +#include "cutlass/coord.h" +#include "cutlass/util/platform.h" -#include -#include -#include -#include -#include +#include "cutlass/gemm/gemm_operand.h" +#include "cutlass/matrix_traits.h" +#include "cutlass/predicate_vector.h" +#include "cutlass/reshape_tile.h" +#include "cutlass/tile_iterator.h" namespace cutlass { namespace gemm { @@ -80,20 +80,24 @@ struct GemmGlobalTileTraits { static int const kAccessSize = kAccessSize_; /// The memory space. static MemorySpace::Kind const kMemorySpace = MemorySpace::kGlobal; - /// The tile shape - typedef typename ReshapeTile::Tile Tile; + typedef Tile_ Tile; + /// The vectorized tile shape + typedef typename ReshapeTile::Tile VectorizedTile; /// The threads shape - typedef typename ReshapeThreads::Threads Threads; + typedef typename ReshapeThreads::Threads Threads; /// The relative offset between two elements in the H/W dimension in adjacent threads. - typedef Shape<1, 1, Tile::kC> ThreadsDelta; - + typedef Shape<1, 1, VectorizedTile::kC> ThreadsDelta; /// The strides in each dimension between different loads/stores. typedef Shape<0, Threads::kH, Threads::kW * kAccessSize> Delta; + /// Strides for immediate offset computation typedef Shape<0, 0, Threads::kW * ThreadsDelta::kW, kAccessSize> ImmediateOffsetStrides; /// The number of iterations needed to load/store the tile. - typedef Shape<1, Tile::kH / Threads::kH, Tile::kW / Threads::kW, Tile::kC / kAccessSize> + typedef Shape<1, + VectorizedTile::kH / Threads::kH, + VectorizedTile::kW / Threads::kW, + VectorizedTile::kC / kAccessSize> Iterations; typedef GemmMultiplicandTraits MultiplicandTraits; @@ -165,7 +169,6 @@ struct GemmGlobalIteratorAb Index_> { /// This class. typedef GemmGlobalIteratorAb This_; /// The base class. - typedef TileLoadIterator - CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc, Scalar const* ptr, Index stride_h) { + CUTLASS_HOST_DEVICE int initialize(Scalar const* ptr, + long long stride_d, + Index stride_h) { Index inc_d = 0; Index inc_advance = 0; // Move by some columns for each iteration in the H dimension. @@ -221,99 +227,36 @@ struct GemmGlobalIteratorAb (Base::Iterations::kH - 1) * inc_h; } - // The dimensions of the tile. - int const kH = TileTraits_::Tile::kH; - int const kW = TileTraits_::Tile::kW * TileTraits_::kAccessSize; - - // Move to the residue. - Index const kBlock = kAdvance == IteratorAdvance::kH ? kH : kW; - // The jump in the gemm-k dimension. - Index const stride = kAdvance == IteratorAdvance::kH ? stride_h : 1; - - // Compute the offset to the residue and how to "come" back. - Index const kResidue = desc.k % kBlock; - if (kResidue > 0) { - move_to_residue_offset = (desc.k - kResidue) * stride; - } else { - move_to_residue_offset = (desc.k - kBlock) * stride; - } - - Base::Params::initialize(ptr, 0, stride_h, 1, inc_d, inc_h, 0, inc_advance); + Base::Params::initialize( + ptr, stride_d, stride_h, 1, inc_d, inc_h, 0, inc_advance); return 0; } - - // The extra offset to control moving to the residue. - Index move_to_residue_offset; }; - /// Ctor. - CUTLASS_DEVICE GemmGlobalIteratorAb(Params const& _params, - const Coord<3>& bounds, - const Coord<3>& block, - ThreadOffset thread_offset_func = ThreadOffset()) - : params(_params) { - thread_offset = thread_offset_func(); - // The column. - Index block_h = thread_offset[1]; - // The contiguous dimension. - Index block_w = thread_offset[2]; + /// Offset of an individual lane from the start of the tile + Coord<4> thread_offset; + /// The parameters + Params params; + /// The predicates. + PredicateVector predicates; - // Add the blocks indices. - if (kAdvance == IteratorAdvance::kH) { - block_h += block[1]; - block_w += block[2]; - - } else { - block_h += block[2]; - block_w += block[1]; - } - - // Setup the pointer. - params.pointer += (block_h * params.stride_h + block_w); - - // Initialize predicates - initialize_predicates(bounds, make_Coord(0, block_h, block_w)); - } - - /// The accessor. - CUTLASS_DEVICE void get(typename Base::AccessType& value, int d, int h, int w, int c) const { - int const imm = - ComputeOffsetFromStrides::get(0, 0, w, c); - Load::load(value, params.pointer, imm); - } - - /// Increment the pointer in the H dimension. - CUTLASS_DEVICE void inc_h() { params.pointer += params.inc_h; } - /// Increment the pointer in the D dimension. - CUTLASS_DEVICE void inc_d() { params.pointer += params.inc_d; } - /// Increment the pointer to move to the next iteration. - CUTLASS_DEVICE void inc_advance() { params.pointer += params.inc_advance; } - - /// Initialize the predicates. - CUTLASS_DEVICE void initialize_predicates(const Coord<3>& bounds, const Coord<3>& block) { + CUTLASS_HOST_DEVICE void initialize_predicates(const Coord<3>& bounds, const Coord<3>& block_offset) { // Setup the masks to control loads. predicates.fill(0); - int bounds_h, bounds_w; - if (kAdvance == IteratorAdvance::kH) { - bounds_w = bounds[2] - block[2]; - bounds_h = bounds[1]; - - } else { - bounds_w = bounds[1]; - bounds_h = bounds[2] - block[1]; - } - // Fill in the bits of the predicate vector. for (int d = 0; d < Base::Iterations::kD; ++d) { for (int h = 0; h < Base::Iterations::kH; ++h) { for (int w = 0; w < Base::Iterations::kW; ++w) { for (int c = 0; c < Base::Iterations::kC; ++c) { - bool flag = w * Base::Delta::kW < bounds_w; + bool flag = w * Base::Delta::kW + thread_offset[2] + block_offset[2] < bounds[2]; if (kAdvance == IteratorAdvance::kH) { - flag = flag && (h * Base::Delta::kH + d * Base::Delta::kD) < bounds_h; + flag = + flag && + (h * Base::Delta::kH + d * Base::Delta::kD) + thread_offset[1] + block_offset[1] < + bounds[1]; } else { - flag = flag && (h * Base::Delta::kH) < bounds_h; + flag = flag && (h * Base::Delta::kH) + thread_offset[1] + block_offset[1] < bounds[1]; } int const bit = ComputeOffsetFromShape::get(d, h, w, c); predicates.set(bit, flag); @@ -323,31 +266,44 @@ struct GemmGlobalIteratorAb } } - /// Move to residue portion. - CUTLASS_DEVICE void move_to_residue(Index k) { - // Store the pointer and the predicates. - stored_pointer = params.pointer; - stored_predicates = predicates; + /// Ctor. + CUTLASS_HOST_DEVICE GemmGlobalIteratorAb(Params const& _params, + const Coord<3>& bounds, + const Coord<3>& threadblock_offset, + ThreadOffset thread_offset_func = ThreadOffset()) + : params(_params) { + thread_offset = thread_offset_func(); + // Setup the pointer. + params.pointer += ((threadblock_offset[1] + thread_offset[1]) * params.stride_h + + (threadblock_offset[2] + thread_offset[2])); - // Move the pointer to the residue. - params.pointer += params.move_to_residue_offset; + } - // The dimensions of the tile. - int const kH = TileTraits_::Tile::kH; - int const kW = TileTraits_::Tile::kW * TileTraits_::kAccessSize; + /// Increment the pointer in the W dimension. + CUTLASS_HOST_DEVICE void inc_w() { Base::inc_w(); } + /// Increment the pointer in the H dimension. + CUTLASS_HOST_DEVICE void inc_h() { params.pointer += params.inc_h; } + /// Increment the pointer in the D dimension. + CUTLASS_HOST_DEVICE void inc_d() { params.pointer += params.inc_d; } + /// Increment the pointer to move to the next iteration. + CUTLASS_HOST_DEVICE void inc_advance() { params.pointer += params.inc_advance; } - // The unrolling factor. - int const kUnroll = kAdvance == IteratorAdvance::kH ? kH : kW; - - // Clear the predicates for the residue. TODO: We can do something smarter. - int const kResidue = (int)(k % (Index)kUnroll); - if (kResidue > 0) { - residue(kResidue); - } + /// Loads a single fragment element from memory + CUTLASS_HOST_DEVICE void load_element( + typename Base::AccessType& value, int d, int h, int w, int c) const { + int const offset = + ComputeOffsetFromStrides::get(0, 0, w, c); + Load::load(value, params.pointer, offset); } /// That's the residue! Update the predicates. - CUTLASS_DEVICE void residue(Index k) { + CUTLASS_HOST_DEVICE void residue(Index k) { // The coordinates of the thread. Index block_h = thread_offset[1]; // The contiguous dimension. @@ -375,26 +331,63 @@ struct GemmGlobalIteratorAb } } - /// Rollback to beginning of first tile and initialize predicates. - CUTLASS_DEVICE void rollback() { - params.pointer = stored_pointer; - predicates = stored_predicates; - } - - /// Is the iterator valid? - CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { + /// Is the valid? + CUTLASS_HOST_DEVICE bool valid(int d, int h, int w, int c) const { int const bit = ComputeOffsetFromShape::get(d, h, w, c); return predicates[bit]; } - /// Offset of an individual lane from the start of the tile - Coord<4> thread_offset; - /// The parameters - Params params; - /// The pointer. - typename Base::Scalar const* stored_pointer; - /// The predicates. - PredicateVector predicates, stored_predicates; + /// Adds a vector offset to the iterator + CUTLASS_HOST_DEVICE GemmGlobalIteratorAb & operator+=(Coord<3> const &offset) { + + long long _offset = offset.template dot( + make_Coord(params.stride_d, params.stride_h, params.stride_w) + ); + + params.pointer += _offset; + return *this; + } + + CUTLASS_HOST_DEVICE void add_pointer_offset(Index offset) { params.pointer += offset; } + + CUTLASS_HOST_DEVICE Index stride_advance(void) { + Index stride = params.stride_h; + if (kAdvance == IteratorAdvance::kW) { + stride = params.stride_w; + } + return stride; + } + + template + CUTLASS_HOST_DEVICE void load_post_increment(Fragment& fragment) { + typename Base::FragmentIterator frag_iterator(fragment); + for (int d = 0; d < Base::Iterations::kD; ++d) { + for (int h = 0; h < Base::Iterations::kH; ++h) { + for (int w = 0; w < Base::Iterations::kW; ++w) { + for (int c = 0; c < Base::Iterations::kC; ++c) { + if (valid(d, h, w, c)) { + load_element( + reinterpret_cast(frag_iterator.at(d, h, w, c)), + d, + h, + w, + c); + } + } + if (w < Base::Iterations::kW - 1) { + inc_w(); + } + } + if (h < Base::Iterations::kH - 1) { + inc_h(); + } + } + if (d < Base::Iterations::kD - 1) { + inc_d(); + } + } + inc_advance(); + } }; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -433,6 +426,8 @@ struct GemmGlobalIteratorCd : public TileIteratorBasepointer = pointer; + // Stride per batch + stride_d = batch_stride; // Each column of the matrix. - stride_h = TileTraits_::ThreadsDelta::kH * ld; + stride_h = TileTraits_::ThreadsDelta::kH * ldm; // Each thread output 1 column per iteration. The stride between columns is given by the // number of scalars that are loaded per LDS for B. - inc_h = ld * TileTraits_::kStrideH; + inc_h = ldm * TileTraits_::kStrideH; inc_advance = - (ld - ld * TileTraits_::kStrideH * (Base::Iterations::kH - 1)) + epilogue_stride_w; + (ldm - ldm * TileTraits_::kStrideH * (Base::Iterations::kH - 1)) + epilogue_stride_w; predicate_offset = bound; predicate_inc_h = TileTraits_::kStrideH; @@ -464,75 +465,173 @@ struct GemmGlobalIteratorCd : public TileIteratorBase thread_offset; + /// The predicates for the row. + cutlass::PredicateVector predicates; /// Ctor. - CUTLASS_DEVICE GemmGlobalIteratorCd() {} + CUTLASS_HOST_DEVICE GemmGlobalIteratorCd(Params const& _params, + const Coord<3>& bounds, + const Coord<3>& block_offset, + ThreadOffset thread_offset_func = ThreadOffset()) + : params(_params) { + thread_offset = thread_offset_func(); + // Prepare the vector of predicates. + for (int i = 0; i < Base::Iterations::kW; ++i) { + predicates.set(i, thread_offset[2] + i * Base::Delta::kW < bounds[2]); + } + } /// Ctor. - CUTLASS_DEVICE GemmGlobalIteratorCd(Params const& params, - const Coord<3>& bounds, - const Coord<3>& block, - int offset = 0, - int pred_offset = 0, - ThreadOffset thread_offset_func = ThreadOffset()) - : params(params) { + CUTLASS_HOST_DEVICE GemmGlobalIteratorCd(Params const& _params, + const Coord<3>& bounds, + const Coord<3>& block, + int offset = 0, + int pred_offset = 0, + ThreadOffset thread_offset_func = ThreadOffset()) + : params(_params) { thread_offset = thread_offset_func(); // Each warp works on a different column of the tile. int const h = thread_offset[1] + block[1]; // Each lane writes a different element. int const w = thread_offset[2] + block[2]; // Setup the pointer. - this->params.pointer += ((h * params.stride_h + w) + offset); + params.pointer += ((h * params.stride_h + w) + offset); // Prepare the vector of predicates. for (int i = 0; i < Base::Iterations::kW; ++i) { predicates.set(i, w + i * Base::Delta::kW < bounds[2]); } - this->params.predicate_offset -= (h + pred_offset); - } - - /// The accessor. - CUTLASS_DEVICE void get(typename Base::AccessType& value, int d, int h, int w, int c) const { - int const imm = - ComputeOffsetFromStrides::get(0, 0, w, c); - Load::load(value, params.pointer, imm); + params.predicate_offset -= (h + pred_offset); } /// Increment the pointer in the C dimension. - CUTLASS_DEVICE void inc_c() {} + CUTLASS_HOST_DEVICE void inc_c() {} /// Increment the pointer in the W dimension. - CUTLASS_DEVICE void inc_w() {} + CUTLASS_HOST_DEVICE void inc_w() {} /// Increment the pointer in the H dimension. - CUTLASS_DEVICE void inc_h() { + CUTLASS_HOST_DEVICE void inc_h() { params.pointer += params.inc_h; params.predicate_offset -= params.predicate_inc_h; } /// Increment the pointer in the D dimension. - CUTLASS_DEVICE void inc_d() {} + CUTLASS_HOST_DEVICE void inc_d() {} /// Increment the pointer to move to the next iteration. - CUTLASS_DEVICE void inc_advance() { + CUTLASS_HOST_DEVICE void inc_advance() { params.pointer += params.inc_advance; - this->params.predicate_offset -= params.predicate_inc_advance; + params.predicate_offset -= params.predicate_inc_advance; } - /// The accessor. - CUTLASS_DEVICE void set(typename Base::AccessType const& value, int d, int h, int w, int c) { - int const imm = - ComputeOffsetFromStrides::get(0, 0, w, c); - Store::store( - value, params.pointer, imm); + /// Adds a vector offset to the iterator + CUTLASS_HOST_DEVICE GemmGlobalIteratorCd & operator+=(Coord<3> const &offset) { + long long _offset = offset.template dot( + make_Coord(params.stride_d, params.stride_h, 1) + ); + params.pointer += _offset; + return *this; } - /// Test the validity of the iterator. - CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { + /// Loads a single fragment element from memory. + CUTLASS_HOST_DEVICE void load_element( + typename Base::AccessType& value, int d, int h, int w, int c) const { + int const offset = + ComputeOffsetFromStrides::get(d, h, w, c); + Load::load(value, params.pointer, offset); + } + + /// Stores a single fragment element into memory. + CUTLASS_HOST_DEVICE void store_element( + typename Base::AccessType const& value, int d, int h, int w, int c) { + int const offset = + ComputeOffsetFromStrides::get(d, h, w, c); + Store::store(value, params.pointer, offset); + } + + /// Test the validity of the + CUTLASS_HOST_DEVICE bool valid(int d, int h, int w, int c) const { return predicates.at(w) && params.predicate_offset > 0; } - /// The predicates for the row. - cutlass::PredicateVector predicates; + /// add pointer offset + CUTLASS_HOST_DEVICE void add_pointer_offset(Index offset) { params.pointer += offset; } + + /// Loads and increments iterator + template + CUTLASS_HOST_DEVICE void load_post_increment(Fragment& fragment) { + typename Base::FragmentIterator frag_iterator(fragment); + for (int d = 0; d < Base::Iterations::kD; ++d) { + for (int h = 0; h < Base::Iterations::kH; ++h) { + for (int w = 0; w < Base::Iterations::kW; ++w) { + for (int c = 0; c < Base::Iterations::kC; ++c) { + if (valid(d, h, w, c)) { + load_element( + reinterpret_cast(frag_iterator.at(d, h, w, c)), + d, + h, + w, + c); + } + } + if (w < Base::Iterations::kW - 1) { + inc_w(); + } + } + if (h < Base::Iterations::kH - 1) { + inc_h(); + } + } + if (d < Base::Iterations::kD - 1) { + inc_d(); + } + } + inc_advance(); + } + + template + CUTLASS_HOST_DEVICE void store_post_increment(Fragment& fragment) { + typename Base::FragmentIterator frag_iterator(fragment); + for (int d = 0; d < Base::Iterations::kD; ++d) { + for (int h = 0; h < Base::Iterations::kH; ++h) { + for (int w = 0; w < Base::Iterations::kW; ++w) { + for (int c = 0; c < Base::Iterations::kC; ++c) { + if (valid(d, h, w, c)) { + store_element( + reinterpret_cast(frag_iterator.at(d, h, w, c)), + d, + h, + w, + c); + } + } + if (w < Base::Iterations::kW - 1) { + inc_w(); + } + } + if (h < Base::Iterations::kH - 1) { + inc_h(); + } + } + if (d < Base::Iterations::kD - 1) { + inc_d(); + } + } + inc_advance(); + } }; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/cutlass/gemm/gemm_operand.h b/cutlass/gemm/gemm_operand.h index 737f993f0..2b4dcdc91 100644 --- a/cutlass/gemm/gemm_operand.h +++ b/cutlass/gemm/gemm_operand.h @@ -28,9 +28,9 @@ */ #pragma once -#include -#include -#include +#include "cutlass/matrix_traits.h" +#include "cutlass/reshape_tile.h" +#include "cutlass/util/platform.h" namespace cutlass { namespace gemm { diff --git a/cutlass/gemm/gemm_shared_stream.h b/cutlass/gemm/gemm_shared_stream.h index c6ff7bd97..df20bd6ca 100644 --- a/cutlass/gemm/gemm_shared_stream.h +++ b/cutlass/gemm/gemm_shared_stream.h @@ -28,7 +28,8 @@ */ #pragma once -#include +#include "cutlass/tensor_ref.h" +#include "cutlass/gemm/gemm_shared_tile.h" namespace cutlass { namespace gemm { @@ -56,6 +57,11 @@ struct SharedLoadStream { ""); /// The output fragment. typedef TransformedFragment Fragment; + /// Scalar data type + typedef typename Iterator::Scalar Scalar; + + /// Reference type to a tensor + typedef TensorRef TensorRef; /// The params. struct Params { @@ -73,29 +79,38 @@ struct SharedLoadStream { CUTLASS_DEVICE SharedLoadStream() {} /// Ctor. - CUTLASS_DEVICE SharedLoadStream(Params const ¶ms, SharedStorage &shared_storage) { - this->initialize(params, shared_storage); + CUTLASS_DEVICE SharedLoadStream(Params const ¶ms, TensorRef const &ref) { + this->initialize(params, ref); } /// Initialize the stream. - CUTLASS_DEVICE void initialize(Params const ¶ms, SharedStorage &shared_storage) { + CUTLASS_DEVICE void initialize(Params const ¶ms, TensorRef const &ref) { // The iterator. - iterator = Iterator(params.iterator, shared_storage); + iterator = Iterator(params.iterator, ref.data()); // The transformer. transformer = Transformer(); } /// Load the data from shared memory to the fetch fragment. - CUTLASS_DEVICE void copy(FetchedFragment &fetched) { shared_iterator_load(iterator, fetched); } + CUTLASS_DEVICE void copy() { iterator.load_post_increment(fetched[0]); } /// Load the data from shared memory to the fetch fragment. - CUTLASS_DEVICE void copy(int d, FetchedFragment &fetched) { - shared_iterator_load(iterator, fetched, d); - } + CUTLASS_DEVICE void copy(int step) { iterator.load(fetched[step % 2], step); } /// Commit the data. - CUTLASS_DEVICE void commit(FetchedFragment &fetched, TransformedFragment &transformed) { - transformer.transform(fetched, transformed); + CUTLASS_DEVICE void commit() { transformer.transform(fetched[0], transformed[0]); } + + /// Commit the data. + CUTLASS_DEVICE void commit(int step) { + transformer.transform(fetched[step % 2], transformed[step % 2]); + } + + /// Returns the fragment for the given step + CUTLASS_DEVICE TransformedFragment &fragment(int step = 0) { return transformed[step % 2]; } + + /// Returns the fragment for the given step + CUTLASS_DEVICE TransformedFragment const &fragment(int step = 0) const { + return transformed[step % 2]; } /// Increment the stage. @@ -103,8 +118,12 @@ struct SharedLoadStream { /// The iterator. Iterator iterator; + /// Fetched fragment + FetchedFragment fetched[2]; /// The transformer. Transformer transformer; + /// Transformed fragment + TransformedFragment transformed[2]; }; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/cutlass/gemm/gemm_shared_tile.h b/cutlass/gemm/gemm_shared_tile.h index 7c61e0229..78fb1f205 100644 --- a/cutlass/gemm/gemm_shared_tile.h +++ b/cutlass/gemm/gemm_shared_tile.h @@ -27,7 +27,7 @@ */ #pragma once -#include +#include "cutlass/gemm/gemm_operand.h" namespace cutlass { namespace gemm { diff --git a/cutlass/gemm/gemm_stream_pair.h b/cutlass/gemm/gemm_stream_pair.h new file mode 100644 index 000000000..0a6df15ed --- /dev/null +++ b/cutlass/gemm/gemm_stream_pair.h @@ -0,0 +1,251 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines a pair of GEMM tile streams +*/ +#pragma once + +#include "cutlass/convert.h" +#include "cutlass/matrix_traits.h" +#include "cutlass/reshape_tile.h" +#include "cutlass/tile_allocation.h" +#include "cutlass/tile_iterator.h" + +#include "cutlass/gemm/clear_accumulators.h" +#include "cutlass/gemm/gemm_config.h" +#include "cutlass/gemm/gemm_global_stream.h" +#include "cutlass/gemm/gemm_operand.h" +#include "cutlass/gemm/gemm_shared_stream.h" +#include "cutlass/gemm/threadblock_swizzle.h" + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Collect the global load streams for multiplicands. +template +struct GlobalLoadStreamPair { + // + // Type definitions + // + + /// Stream for A multiplicand + typedef StreamA_ StreamA; + + /// Stream for B multiplicand + typedef StreamB_ StreamB; + + /// Parameters object + struct Params { + /// Parameters object for StreamA + typename StreamA::Params stream_a; + + /// Parameters object for StreamB + typename StreamB::Params stream_b; + + /// Default constructor + CUTLASS_HOST_DEVICE + Params() {} + + /// Constructs a global load stream pair Params object + CUTLASS_HOST_DEVICE + Params(typename StreamA::Params const &_params_A, typename StreamB::Params const &_params_B) + : stream_a(_params_A), stream_b(_params_B) {} + }; + + /// Assumes the A stream defines the index type + typedef typename StreamA::Index Index; + + /// Shared memory allocation for threadblock-scoped GEMM tile + typedef ZipTileAllocation + ThreadblockTileStorage; + + /// ZipTensorRef to threadblock tiles + typedef typename ThreadblockTileStorage::TensorRef ThreadblockTileRef; + + /// Defines a structure containing shared storage for each pair + struct SharedStorage { + typename StreamA::SharedStorage stream_a; + typename StreamB::SharedStorage stream_b; + }; + + // + // Data members + // + + /// Stream for A multiplicand + StreamA stream_a; + + /// Stream for B multiplicand + StreamB stream_b; + + // + // Methods + // + + /// Ctor. + CUTLASS_DEVICE GlobalLoadStreamPair(Params const ¶ms, + SharedStorage &shared_storage, + ThreadblockTileRef const &threadblock_tile_ref, + Coord<3> const &bounds, + Coord<3> const &block_offset = make_Coord(0, 0, 0)) + : stream_a(params.stream_a, + shared_storage.stream_a, + threadblock_tile_ref.first, + bounds, + block_offset), + stream_b(params.stream_b, + shared_storage.stream_b, + threadblock_tile_ref.second, + bounds, + block_offset) {} + + CUTLASS_DEVICE + GlobalLoadStreamPair & operator+=(Coord<3> const offset) { + stream_a += offset; + stream_b += offset; + return *this; + } + + /// Trigger the copies from shared memory to registers. + CUTLASS_DEVICE void copy() { + stream_a.copy(); + stream_b.copy(); + } + + /// Commit the data. + CUTLASS_DEVICE void commit() { + stream_a.commit(); + stream_b.commit(); + } + + /// Execute the residue code. + CUTLASS_DEVICE void residue(Index k, bool skip_clear = false) { + stream_a.residue(k, skip_clear); + stream_b.residue(k, skip_clear); + } + + /// Move to residue. + CUTLASS_DEVICE void move_to_residue(Index k, Index kTileK) { + if (kResidueInProlog_) { + stream_a.move_to_residue(k, kTileK); + stream_b.move_to_residue(k, kTileK); + } else if (k < kTileK) { + residue(k, true); + } + } + + /// Rollback to beginning of first tile. + CUTLASS_DEVICE void rollback(bool kRollback) { + if (kResidueInProlog_ && kRollback) { + stream_a.rollback(); + stream_b.rollback(); + } + } +}; + +/// Collect the global load streams for multiplicands. +template +struct SharedStreamPair { + // + // Type definitions + // + + /// Stream for A multiplicand + typedef StreamA_ StreamA; + + /// Stream for B multiplicand + typedef StreamB_ StreamB; + + /// Parameters object passed to load iterators + struct Params { + /// + typename StreamA::Params stream_a; + + /// + typename StreamB::Params stream_b; + }; + + /// Shared memory allocation for threadblock-scoped GEMM tile + typedef ZipTensorRef + ThreadblockTileRef; + + // + // Data members + // + + /// The stream for A. + StreamA stream_a; + + /// The stream for B. + StreamB stream_b; + + // + // Methods + // + + /// Construct with the composable structure + CUTLASS_DEVICE SharedStreamPair(Params const ¶ms, ThreadblockTileRef const &threadblock_tile_ref) + : stream_a(params.stream_a, threadblock_tile_ref.first), + stream_b(params.stream_b, threadblock_tile_ref.second) {} + + /// Trigger the copies from shared memory to registers. + CUTLASS_DEVICE void copy(int step) { + stream_a.copy(step); + stream_b.copy(step); + } + + /// Commit the data. + CUTLASS_DEVICE void commit(int step) { + stream_a.commit(step); + stream_b.commit(step); + } + + /// The fragment A. + CUTLASS_DEVICE + typename StreamA::TransformedFragment const &fragment_a(int step) const { + return stream_a.fragment(step); + } + + /// The fragment B. + CUTLASS_DEVICE + typename StreamB::TransformedFragment const &fragment_b(int step) const { + return stream_b.fragment(step); + } + + /// Increment the stage. + CUTLASS_DEVICE void inc_stage() { + stream_a.inc_stage(); + stream_b.inc_stage(); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_traits.h b/cutlass/gemm/gemm_traits.h index cb57c4d5c..fd6efb466 100644 --- a/cutlass/gemm/gemm_traits.h +++ b/cutlass/gemm/gemm_traits.h @@ -27,117 +27,27 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "cutlass/convert.h" +#include "cutlass/matrix_traits.h" +#include "cutlass/reshape_tile.h" +#include "cutlass/tile_allocation.h" +#include "cutlass/tile_iterator.h" +#include "cutlass/kernel_launch.h" +#include "cutlass/gemm/clear_accumulators.h" +#include "cutlass/gemm/gemm_config.h" +#include "cutlass/gemm/gemm_desc.h" +#include "cutlass/gemm/gemm_stream_pair.h" +#include "cutlass/gemm/gemm_global_stream.h" +#include "cutlass/gemm/gemm_operand.h" +#include "cutlass/gemm/gemm_shared_stream.h" +#include "cutlass/gemm/threadblock_swizzle.h" +#include "cutlass/gemm/gemm.h" namespace cutlass { namespace gemm { //////////////////////////////////////////////////////////////////////////////////////////////////// -template < - /// The scalar type for A. - typename ScalarA_, - /// The scalar type for B. - typename ScalarB_, - /// The scalar type for C. - typename ScalarC_, - /// The scalar type for D. - typename ScalarD_, - /// The output tile size for the GEMM KxNxM. - typename OutputTile_, - /// The functor to do the math. - typename MultiplyAdd_, - /// The number of scalars per LDG for A. - int kScalarsPerLdgA_, - /// The number of scalars per STS for A. - int kScalarsPerStsA_, - /// The number of scalars per LDG for A. - int kScalarsPerLdsA_, - /// The number of scalars per LDG for B. - int kScalarsPerLdgB_, - /// The number of scalars per STS for B. - int kScalarsPerStsB_, - /// The number of scalars per LDS for B. - int kScalarsPerLdsB_, - /// The number of scalars per LDG for C and STG for D. - int kScalarsPerLdgCAndStgD_, - /// The number of scalars per STS for D. - int kScalarsPerStsD_, - /// The number of scalars per LDS for D. - int kScalarsPerLdsD_, - /// The number of stages in shared memory to do single/double/triple-buffering. - int kStages_, - /// Do we do the residue in the prologue? - bool kResidueInPrologue_ = false> - -struct GemmConfig { - // - /// The scalar for A. - typedef ScalarA_ ScalarA; - /// The scalar for B. - typedef ScalarB_ ScalarB; - /// The scalar for C. - typedef ScalarC_ ScalarC; - /// The scalar for D. - typedef ScalarD_ ScalarD; - - /// The tile. - typedef OutputTile_ OutputTile; - /// The functor to do D = A*B + C. - typedef MultiplyAdd_ MultiplyAdd; - /// The shape of the instruction. - typedef typename MultiplyAdd::InstructionShape InstructionShape; - /// The number of accumulators per warp. - typedef typename MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp; - /// The accumulators. - typedef typename MultiplyAdd::Accumulators Accumulators; - - /// The number of warps. - typedef typename ShapeDiv::Shape Warps; - /// The default warp size (32 threads per warp). - static int const kWarpSize = cutlass::kWarpSize; - /// The numnber of threads. - static int const kThreads = ShapeCount::kCount * kWarpSize; - - /// The number of scalars per LDG/STS/LDS for A. - static int const kScalarsPerLdgA = kScalarsPerLdgA_; - static int const kScalarsPerStsA = kScalarsPerStsA_; - static int const kScalarsPerLdsA = kScalarsPerLdsA_; - - /// The number of scalars per LDG/STS/LDS for B. - static int const kScalarsPerLdgB = kScalarsPerLdgB_; - static int const kScalarsPerStsB = kScalarsPerStsB_; - static int const kScalarsPerLdsB = kScalarsPerLdsB_; - - /// The number of scalars per LDG for C. - static int const kScalarsPerLdgC = kScalarsPerLdgCAndStgD_; - - /// The number of scalars per STS/LDS/STG for D. - static int const kScalarsPerStgD = kScalarsPerLdgCAndStgD_; - static int const kScalarsPerStsD = kScalarsPerStsD_; - static int const kScalarsPerLdsD = kScalarsPerLdsD_; - - /// The number of accumulators that are going to be fed from one LDS A/B. - static int const kAccumulatorsPerLdsA = kScalarsPerLdsA / InstructionShape::kD; - static int const kAccumulatorsPerLdsB = kScalarsPerLdsB / InstructionShape::kD; - - /// The number of stages in shared memory to implement double, triple, more-buffering. - static int const kStages = kStages_; - - /// Do we do the residue in the prologue? - static bool const kResidueInPrologue = kResidueInPrologue_; -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - template struct GemmTileTraitsHelperA {}; @@ -416,60 +326,6 @@ struct GemmTileTraitsHelperB { //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct GemmResidue { - /// Move to residue portion. - template - static CUTLASS_DEVICE void move_to_residue(typename GemmTraits_::GlobalLoadStreamA& stream_a, - typename GemmTraits_::GlobalLoadStreamB& stream_b, - typename GemmTraits_::Index k) { - // The new code path in CUTLASS 1.0.1: We treat the residue in the prologue so we can have - // complete main loops after that. It helps simplify the logic in the main loop. - if (kIsPrologue) { - stream_a.move_to_residue(k); - stream_b.move_to_residue(k); - } - } - - /// Rollback to beginning of first tile and initialize predicates. - static CUTLASS_DEVICE void rollback(typename GemmTraits_::GlobalLoadStreamA& stream_a, - typename GemmTraits_::GlobalLoadStreamB& stream_b) { - stream_a.rollback(); - stream_b.rollback(); - } -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -struct GemmResidue { - /// Move to residue portion. - template - static CUTLASS_DEVICE void move_to_residue(typename GemmTraits_::GlobalLoadStreamA& stream_a, - typename GemmTraits_::GlobalLoadStreamB& stream_b, - typename GemmTraits_::Index k) { - // The index. - typedef typename GemmTraits_::Index Index; - // By how much we unroll the main loop. - Index const kUnroll = static_cast(GemmTraits_::OutputTile::kD); - - // Call the residue code. That's the same path as CUTLASS 1.0.0. - if (kIsPrologue && k < kUnroll) { - stream_a.residue(k, true); - stream_b.residue(k, true); - } else if (k <= kUnroll) { - stream_a.residue(k, false); - stream_b.residue(k, false); - } - } - - /// Rollback to beginning of first tile and initialize predicates. - static CUTLASS_DEVICE void rollback(typename GemmTraits_::GlobalLoadStreamA& stream_a, - typename GemmTraits_::GlobalLoadStreamB& stream_b) {} -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - template < /// The GEMM configuration. typename GemmConfig_, @@ -488,27 +344,27 @@ template < /// The index. typename Index_ = int, /// The tool used to clear accumulators. - typename ClearAccumulators_ = ClearAccumulators > + typename ClearAccumulators_ = ClearAccumulators > struct GemmTraits { - /// This class. + /// This traits typedef GemmTraits - This_; + GlobalLoadStreamA_, + GlobalLoadStreamB_, + SharedLoadStreamA_, + SharedLoadStreamB_, + Epilogue_, + BlockSwizzle_, + Index_, + ClearAccumulators_> This_; + + /// The struct that consumes this Traits + typedef typename cutlass::gemm::Gemm KernelClass; /// The configuration. typedef GemmConfig_ GemmConfig; /// The output tile. typedef typename GemmConfig::OutputTile OutputTile; - /// Is the residue treated in the prologue? - static bool const kResidueInPrologue = GemmConfig::kResidueInPrologue; /// The stream to load A from global memory to shared memory. typedef GlobalLoadStreamA_ GlobalLoadStreamA; @@ -544,18 +400,30 @@ struct GemmTraits { /// Clear the accumulators. typedef ClearAccumulators_ ClearAccumulators; - /// The params. - struct Params { - /// The dimensions of the GEMM. - Index m, n, k; - /// The params for the A stream. - typename GlobalLoadStreamA::Params global_stream_a; - /// The params for the B stream. - typename GlobalLoadStreamB::Params global_stream_b; - /// The params for the A stream from shared memory. - typename SharedLoadStreamA::Params shared_stream_a; - /// The params for the B stream from shared memory. - typename SharedLoadStreamB::Params shared_stream_b; + /// Assemble the global load streams for A/B. + typedef GlobalLoadStreamPair + GlobalLoadStream; + + /// Memory needed to store the threadblock-scoped GEMM tile + typedef typename GlobalLoadStream::ThreadblockTileStorage ThreadblockTileStorage; + + /// Assemble the shared load streams for A/B. + typedef SharedStreamPair SharedStream; + + /// Parameters object constructable on the host. + struct Params : public KernelLaunchConfiguration { + + /// GEMM problem size + GemmCoord problem_size; + + /// Parameters object for the global load stream + typename GlobalLoadStream::Params global_to_shared_stream; + + /// Parameters object for the shared load stream + typename SharedStream::Params shared_stream; + /// The params for the epilogue. typename Epilogue::Params epilogue; @@ -563,21 +431,36 @@ struct GemmTraits { template CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) { // Set the problem size. - this->m = desc.m; - this->n = desc.n; - this->k = desc.k; + problem_size = desc.problem_size; - // Initialize the iterator for A. - int error_code = - global_stream_a.initialize(desc, reinterpret_cast(desc.d_a), desc.lda); + // Compute grid dimensions + BlockSwizzle block_swizzle; + this->block = dim3(GemmConfig::kThreads); + this->grid = block_swizzle.get_grid_layout( + problem_size, + make_Coord_from_shape()); + // Compute offset to residue. + Index gemm_k = problem_size[0]; + Index offset_to_residue = (gemm_k % OutputTile::kD) ? gemm_k - (gemm_k % OutputTile::kD) : 0; + + // Initialize parameters objects for + int error_code = global_to_shared_stream.stream_a.initialize( + desc.A.data(), + desc.batch_stride_A, + desc.A.leading_dim(), + offset_to_residue + ); if (error_code) { return error_code; } - // Initialize the iterator for B. - error_code = - global_stream_b.initialize(desc, reinterpret_cast(desc.d_b), desc.ldb); + error_code = global_to_shared_stream.stream_b.initialize( + desc.B.data(), + desc.batch_stride_B, + desc.B.leading_dim(), + offset_to_residue + ); if (error_code) { return error_code; @@ -586,24 +469,81 @@ struct GemmTraits { // The epilogue. return epilogue.initialize(desc); } - }; - // The storage for A. - template - union StreamSharedStorage { - // The storage needed by the global stream. - typename GlobalLoadStream_::SharedStorage global; - // The storage needed by the shared stream. - typename SharedLoadStream_::SharedStorage shared; + /// Helper to construct a GEMM params using a BLAS-like API + CUTLASS_HOST_DEVICE int initialize(Index m, + Index n, + Index k, + typename Epilogue::Scalar alpha, + ScalarA const* d_a, + Index lda, + ScalarB const* d_b, + Index ldb, + typename Epilogue::Scalar beta, + ScalarC const* d_c, + Index ldc, + ScalarD* d_d, + Index ldd) { + GemmDesc desc( + GemmCoord(k, n, m, 1), + alpha, + TensorRef(d_a, lda), + TensorRef(d_b, ldb), + beta, + TensorRef(d_c, ldc), + TensorRef(d_d, ldd) + ); + + return this->initialize(desc); + } + + /// Helper to construct a batched GEMM params + CUTLASS_HOST_DEVICE int initialize(Index m, + Index n, + Index k, + typename Epilogue::Scalar alpha, + ScalarA const* d_a, + Index lda, + long long int batch_stride_A, + ScalarB const* d_b, + Index ldb, + long long int batch_stride_B, + typename Epilogue::Scalar beta, + ScalarC const* d_c, + Index ldc, + long long int batch_stride_C, + ScalarD* d_d, + Index ldd, + long long int batch_stride_D, + Index batch_count) { + + GemmDesc desc( + GemmCoord(k, n, m, batch_count), + alpha, + TensorRef(d_a, lda), + batch_stride_A, + TensorRef(d_b, ldb), + batch_stride_B, + beta, + TensorRef(d_c, ldc), + batch_stride_C, + TensorRef(d_d, ldd), + batch_stride_D + ); + + return this->initialize(desc); + } }; // The storage for the main loop + prologue. struct MainLoopSharedStorage { - // The storage to shuffle the A matrix in shared memory. - StreamSharedStorage stream_a; - // The storage to shuffle the B matrix in shared memory. - StreamSharedStorage stream_b; - // The storage to clear the accumulators if needed. + /// Stores the threadblock tile + ThreadblockTileStorage threadblock_tile; + + /// Storage for GEMM global stream + typename GlobalLoadStream::SharedStorage global_to_shared_stream; + + /// Storage for clearing accumulators typename ClearAccumulators::SharedStorage clear; }; @@ -615,108 +555,18 @@ struct GemmTraits { typename Epilogue::SharedStorage epilogue; }; - /// Assemble the global load streams for A/B. - struct GlobalLoadStream { - /// Ctor. - CUTLASS_DEVICE GlobalLoadStream(Params const& params, - SharedStorage& shared_storage, - dim3 const& block) - : stream_a(params.global_stream_a, - shared_storage.main_loop.stream_a.global, - cutlass::make_Coord(0, params.k, params.m), - cutlass::make_Coord(0, 0, block.x)), - stream_b(params.global_stream_b, - shared_storage.main_loop.stream_b.global, - cutlass::make_Coord(0, params.k, params.n), - make_Coord(0, 0, block.y)) {} - - /// Trigger the copies from shared memory to registers. - CUTLASS_DEVICE void copy() { - stream_a.copy(); - stream_b.copy(); - } - - /// Commit the data. - CUTLASS_DEVICE void commit() { - stream_a.commit(); - stream_b.commit(); - } - - /// Move to residue portion. - template - CUTLASS_DEVICE void move_to_residue(Index k) { - GemmResidue::move_to_residue(stream_a, stream_b, k); - } - - /// Rollback to beginning of first tile and initialize predicates. - CUTLASS_DEVICE void rollback() { GemmResidue::rollback(stream_a, stream_b); } - - /// The stream for A. - GlobalLoadStreamA stream_a; - /// The stream for B. - GlobalLoadStreamB stream_b; - }; - - /// Assemble the shared load stream for A/B. - struct SharedLoadStream { - /// Ctor. - CUTLASS_DEVICE SharedLoadStream(Params const& params, SharedStorage& shared_storage) { - stream_a.initialize(params.shared_stream_a, shared_storage.main_loop.stream_a.shared); - stream_b.initialize(params.shared_stream_b, shared_storage.main_loop.stream_b.shared); - } - - /// Trigger the copies from shared memory to registers. - CUTLASS_DEVICE void copy(int step) { - stream_a.copy(step, fetched_a[step % 2]); - stream_b.copy(step, fetched_b[step % 2]); - } - - /// Commit the data. - CUTLASS_DEVICE void commit(int step) { - stream_a.commit(fetched_a[step % 2], transformed_a[step % 2]); - stream_b.commit(fetched_b[step % 2], transformed_b[step % 2]); - } - - /// The fragment A. - CUTLASS_DEVICE typename SharedLoadStreamA::Fragment const& fragment_a(int step) const { - return transformed_a[step % 2]; - } - - /// The fragment B. - CUTLASS_DEVICE typename SharedLoadStreamB::Fragment const& fragment_b(int step) const { - return transformed_b[step % 2]; - } - - /// Increment the stage. - CUTLASS_DEVICE void inc_stage() { - stream_a.inc_stage(); - stream_b.inc_stage(); - } - - /// The stream for A. - SharedLoadStreamA stream_a; - /// The fragments to fetch A. - typename SharedLoadStreamA::FetchedFragment fetched_a[2]; - /// The fragments to transform A. - typename SharedLoadStreamA::TransformedFragment transformed_a[2]; - /// The stream for B. - SharedLoadStreamB stream_b; - /// The fragments to fetch B. - typename SharedLoadStreamB::FetchedFragment fetched_b[2]; - /// The fragments to transform B. - typename SharedLoadStreamB::TransformedFragment transformed_b[2]; - }; - /// The memory fence for shared loads. static CUTLASS_DEVICE void shared_load_fence(bool in_loop) { if (SharedLoadStreamA::Iterator::kRequiresLoadFence || SharedLoadStreamB::Iterator::kRequiresLoadFence) { - __syncthreads(); + __syncthreads(); } } /// The memory fence for shared stores. - static CUTLASS_DEVICE void shared_store_fence(bool in_loop) { __syncthreads(); } + static CUTLASS_DEVICE void shared_store_fence(bool in_loop) { + __syncthreads(); + } }; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -735,7 +585,10 @@ struct SimplifiedGemmTraitsHelper { MemorySpace::kShared> SharedStoreIteratorA; /// The stream to load A from global memory to shared memory. - typedef GlobalLoadStream + typedef GlobalLoadStream GlobalLoadStreamA; /// The global iterator to load B from global memory. @@ -750,7 +603,10 @@ struct SimplifiedGemmTraitsHelper { MemorySpace::kShared> SharedStoreIteratorB; /// The stream to load B from global memory to shared memory. - typedef GlobalLoadStream + typedef GlobalLoadStream GlobalLoadStreamB; /// The iterator to load A from shared memory. diff --git a/cutlass/gemm/hgemm_global_tile.h b/cutlass/gemm/hgemm_global_tile.h index f14dbb311..9d5ffe850 100644 --- a/cutlass/gemm/hgemm_global_tile.h +++ b/cutlass/gemm/hgemm_global_tile.h @@ -29,10 +29,10 @@ */ #pragma once -#include -#include -#include -#include +#include "cutlass/coord.h" +#include "cutlass/gemm/gemm_global_tile.h" +#include "cutlass/matrix_traits.h" +#include "cutlass/reshape_tile.h" namespace cutlass { namespace gemm { @@ -63,14 +63,14 @@ struct HgemmCrosswiseGlobalTileTraits : public GemmGlobalTileTraits< /// The threads. typedef typename Base::Threads Threads; /// The threads strides. - typedef Shape<1, 2, Base::Tile::kC> ThreadsDelta; + typedef Shape<1, 2, Base::VectorizedTile::kC> ThreadsDelta; /// The strides in each dimension between different loads/stores. typedef Shape Delta; /// The number of iterations needed to load/store the tile. - typedef Shape + Base::VectorizedTile::kW / Base::Threads::kW, + Base::VectorizedTile::kC / Base::kAccessSize> Iterations; /// Computes the thread offset in (H, W) based on thread ID struct ThreadOffset { diff --git a/cutlass/gemm/hgemm_multiply_add.h b/cutlass/gemm/hgemm_multiply_add.h index ebbdd06e8..7217d82c5 100644 --- a/cutlass/gemm/hgemm_multiply_add.h +++ b/cutlass/gemm/hgemm_multiply_add.h @@ -28,9 +28,9 @@ */ #pragma once -#include +#include "cutlass/fragment.h" -#include +#include "cutlass/gemm/thread_multiply_add.h" namespace cutlass { namespace gemm { @@ -38,16 +38,18 @@ namespace gemm { //////////////////////////////////////////////////////////////////////////////////////////////////// /// Template performing matrix multiply-add operation within a thread -template -struct ThreadMultiplyAdd { +template +struct ThreadMultiplyAdd { /// The shape of the instruction. typedef Shape<1, 1, 2, 1> InstructionShape; /// The number of accumulators per thread. - typedef AccumulatorsPerThread_ AccumulatorsPerThread; + typedef ThreadGemmShape_ ThreadGemmShape; + /// Aliased for compatibility. Will be removed for CUTLASS v2.0. + typedef ThreadGemmShape AccumulatorsPerThread; /// The number of threads per warp. typedef ThreadsPerWarp_ ThreadsPerWarp; /// The number of accumulators per warp. - typedef typename ShapeMul::Shape AccumulatorsPerWarp; + typedef typename ShapeMul::Shape AccumulatorsPerWarp; /// The type for A. typedef half ScalarA; /// The fragment for A. @@ -88,9 +90,9 @@ struct ThreadMultiplyAdd -#include +#include "cutlass/fragment.h" namespace cutlass { namespace gemm { diff --git a/cutlass/gemm/hgemm_traits.h b/cutlass/gemm/hgemm_traits.h index b08645bf4..2261bb4b3 100644 --- a/cutlass/gemm/hgemm_traits.h +++ b/cutlass/gemm/hgemm_traits.h @@ -27,18 +27,18 @@ */ #pragma once -#include -#include +#include "cutlass/convert.h" +#include "cutlass/reshape_tile.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/gemm_epilogue.h" +#include "cutlass/gemm/gemm_epilogue_traits.h" +#include "cutlass/gemm/gemm_global_tile.h" +#include "cutlass/gemm/gemm_shared_tile.h" +#include "cutlass/gemm/gemm_traits.h" +#include "cutlass/gemm/hgemm_global_tile.h" +#include "cutlass/gemm/hgemm_multiply_add.h" +#include "cutlass/gemm/hgemm_swizzle.h" namespace cutlass { namespace gemm { @@ -48,46 +48,52 @@ namespace gemm { template < /// The tile size for the GEMM KxNxM. typename OutputTile_, - /// The number of accumulators per thread. - typename AccumulatorsPerThread_, + /// Tile size for thread-level GEMM (K-by-N-by-M) + typename ThreadGemmShape_, /// The number of scalars per LDG for A. int kScalarsPerLdgA_ = 2, /// The number of scalars per LDG for B. int kScalarsPerLdgB_ = 2> -struct HgemmConfig - : public GemmConfig< - /// The scalar type for A. - half, - /// The scalar type for B. - half, - /// The scalar type for C. - half, - /// The scalar type for D. - half, - /// The tile size for the GEMM KxNxM. - OutputTile_, - /// The functor to do the math in the main loop. - ThreadMultiplyAdd, half, half, half>, - /// The number of scalars per LDG for A. - kScalarsPerLdgA_, - /// The number of scalars per STS for A. - kScalarsPerLdgA_, - /// The number of scalars per LDS for A. - 8, - /// The number of scalars per LDG for B. - kScalarsPerLdgB_, - /// The number of scalars per STS for B. - kScalarsPerLdgB_, - /// The number of scalars per LDS for B. - 8, - /// The number of scalars per LDG for C and STG for D. - 2, - /// The number of scalars per STS for D. - 8, - /// The number of scalars per LDS for D. - 2, - /// The number of stages in shared memory. - 2> {}; +struct HgemmConfig : public GemmConfig< + /// The scalar type for A. + half, + /// The scalar type for B. + half, + /// The scalar type for C. + half, + /// The scalar type for D. + half, + /// The tile size for the GEMM KxNxM. + OutputTile_, + /// The functor to do the math in the main loop. + ThreadMultiplyAdd, half, half, half>, + /// The number of scalars per LDG for A. + kScalarsPerLdgA_, + /// The number of scalars per STS for A. + kScalarsPerLdgA_, + /// The number of scalars per LDS for A. + 8, + /// The number of scalars per LDG for B. + kScalarsPerLdgB_, + /// The number of scalars per STS for B. + kScalarsPerLdgB_, + /// The number of scalars per LDS for B. + 8, + /// The number of scalars per LDG for C and STG for D. + 2, + /// The number of scalars per STS for D. + 8, + /// The number of scalars per LDS for D. + 2, + /// The number of stages in shared memory. + 2, + /// kResidueSeparate + false, + /// kResidueInPrologue + true, + /// kLaunchBounds + false + > {}; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -147,7 +153,6 @@ struct HgemmTileTraitsHelperA GemmConfig_::kScalarsPerLdgA> GlobalTileTraits; - /// The skew. static int const kSkewA = 128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2; /// The traits class to build the iterator to store data to shared memory for A^T. @@ -215,7 +220,6 @@ struct HgemmTileTraitsHelperB GemmConfig_::kScalarsPerLdgB> GlobalTileTraits; - /// The skew for B. static int const kSkewB = 128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2; /// The traits class to build the iterator to store data to shared memory for B^N. @@ -266,8 +270,8 @@ template < typename OutputTile_, /// The functor to do the math in the epilogue. typename EpilogueFunctor_, - /// The number of accumulators per thread. - typename AccumulatorsPerThread_ = Shape<8, 8, 16>, + /// Tile size for thread-level GEMM (K-by-N-by-M) + typename ThreadGemmShape_, /// The number of halfs loaded in one LDG for A. int kScalarsPerLdgA_ = 2, /// The number of halfs loaded in one LDG for B. @@ -276,8 +280,7 @@ template < typename Index_ = int> struct HgemmTraitsHelper { /// The HGEMM config. - typedef HgemmConfig - GemmConfig; + typedef HgemmConfig GemmConfig; /// The GEMM config for A. typedef HgemmTileTraitsHelperA GemmTileTraitsHelperA; /// The GEMM config for B. @@ -296,7 +299,10 @@ struct HgemmTraitsHelper { MemorySpace::kShared> SharedStoreIteratorA; /// The stream to load A from global memory to shared memory. - typedef GlobalLoadStream + typedef GlobalLoadStream GlobalLoadStreamA; /// The iterator to load B from global memory. @@ -312,7 +318,10 @@ struct HgemmTraitsHelper { MemorySpace::kShared> SharedStoreIteratorB; /// The stream to load B from global memory to shared memory. - typedef GlobalLoadStream + typedef GlobalLoadStream GlobalLoadStreamB; /// The iterator to load A from shared memory @@ -354,8 +363,8 @@ template < typename OutputTile_ = Shape<8, 128, 128>, /// The functor to do the math in the epilogue. typename EpilogueFunctor_ = LinearScaling, - /// The number of accumulators per thread. - typename AccumulatorsPerThread_ = Shape<8, 8, 16>, + /// Tile size for warp-level GEMM (K-by-N-by-M) + typename ThreadGemmShape_ = Shape<8, 8, 16>, /// The number of halfs loaded in one LDG for A. int kScalarsPerLdgA_ = 2, /// The number of halfs loaded in one LDG for B. @@ -367,7 +376,7 @@ template < kLayoutB_, OutputTile_, EpilogueFunctor_, - AccumulatorsPerThread_, + ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_> > diff --git a/cutlass/gemm/igemm_epilogue.h b/cutlass/gemm/igemm_epilogue.h index 0d6998031..2ad24f32c 100644 --- a/cutlass/gemm/igemm_epilogue.h +++ b/cutlass/gemm/igemm_epilogue.h @@ -28,13 +28,13 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include +#include "cutlass/convert.h" +#include "cutlass/fragment.h" +#include "cutlass/gemm/gemm_global_stream.h" +#include "cutlass/gemm/gemm_shared_stream.h" +#include "cutlass/gemm/igemm_global_tile.h" +#include "cutlass/reshape_tile.h" +#include "cutlass/tile_iterator.h" namespace cutlass { namespace gemm { @@ -269,8 +269,8 @@ struct IgemmEpilogueTraits : public GemmEpilogueTraits< typename Helper_::SharedStoreIteratorD, // The shared store transformer for D. typename Helper_::SharedStoreTransformerD, - // The iterator to load D from shared memory. - typename Helper_::SharedLoadIteratorD, + // The stream to load D from shared memory. + typename Helper_::SharedLoadStreamD, // The iterations. typename Helper_::Iterations, // The strides between iterations. @@ -294,9 +294,8 @@ struct IgemmEpilogue : public GemmEpilogue { /// Ctor. CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const& params_, typename Base::SharedStorage& shared_storage_, - typename Base::Index m_, - typename Base::Index n_) - : Base(params_, shared_storage_, m_, n_) {} + Coord<3> const& _problem_size) + : Base(params_, shared_storage_, _problem_size) {} }; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -309,9 +308,8 @@ struct IgemmEpilogue : public GemmEpilogue const& _problem_size) + : Base(params_, shared_storage_, _problem_size) {} }; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/cutlass/gemm/igemm_global_tile.h b/cutlass/gemm/igemm_global_tile.h index 3f594ac6a..7a9c1573a 100644 --- a/cutlass/gemm/igemm_global_tile.h +++ b/cutlass/gemm/igemm_global_tile.h @@ -32,9 +32,9 @@ */ #pragma once -#include -#include -#include +#include "cutlass/coord.h" +#include "cutlass/gemm/gemm_global_tile.h" +#include "cutlass/matrix_traits.h" namespace cutlass { namespace gemm { @@ -67,10 +67,10 @@ struct IgemmGlobalTileTraits : public GemmGlobalTileTraits< /// The strides in each dimension between different loads/stores. typedef Shape Delta; /// The number of iterations needed to load/store the tile. - typedef Shape + Base::VectorizedTile::kW / Base::Threads::kW, + Base::VectorizedTile::kC / Base::kAccessSize> Iterations; /// Computes the thread offset in (H, W) based on thread ID @@ -86,24 +86,11 @@ struct IgemmGlobalTileTraits : public GemmGlobalTileTraits< public: /// The threads strides. - typedef Shape<1, 4, Base::Tile::kC> ThreadsDelta; + typedef Shape<1, 4, Base::VectorizedTile::kC> ThreadsDelta; }; //////////////////////////////////////////////////////////////////////////////////////////////////// -/// Deprecated. Please use IgemmGlobalTileTraits instead. - -template -struct IgemmContiguousGlobalTileTraits - : public IgemmGlobalTileTraits {}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - template struct IgemmGlobalIteratorAb : public GemmGlobalIteratorAb { /// The base class. @@ -114,11 +101,11 @@ struct IgemmGlobalIteratorAb : public GemmGlobalIteratorAb /// Constructor. CUTLASS_DEVICE IgemmGlobalIteratorAb(typename Base::Params const& _params, const Coord<3>& bounds, - const Coord<3>& block, + const Coord<3>& threadblock_offset, ThreadOffset thread_offset_func = ThreadOffset()) - : Base(_params, bounds, block, thread_offset_func), in_residue_(false), mask_(0xffffffff) { + : Base(_params, bounds, threadblock_offset, thread_offset_func), mask_(0xffffffff) { // The number of elements read in a single iteration. - int const kBlock = TileTraits_::Tile::kW * TileTraits_::kAccessSize; + int const kBlock = TileTraits_::Tile::kW; // The residue. int const kResidue = (int)(bounds[1] % kBlock); @@ -129,28 +116,12 @@ struct IgemmGlobalIteratorAb : public GemmGlobalIteratorAb } } - /// The accessor. - CUTLASS_DEVICE void get(typename Base::AccessType& value, int d, int h, int w, int c) const { - Base::get(value, d, h, w, c); - if (in_residue_) { - reinterpret_cast(value) &= mask_; - } + CUTLASS_DEVICE void load_element( + typename Base::AccessType& value, int d, int h, int w, int c) const { + Base::load_element(value, d, h, w, c); + reinterpret_cast(value) &= mask_; } - /// Move to residue portion. - CUTLASS_DEVICE void move_to_residue(typename Base::Index k) { - Base::move_to_residue(k); - in_residue_ = true; - } - - /// Move back to the beginning of the first tile. - CUTLASS_DEVICE void rollback() { - Base::rollback(); - in_residue_ = false; - } - - /// Are we in the residue? - bool in_residue_; /// The mask to clean up the values. uint32_t mask_; }; diff --git a/cutlass/gemm/igemm_multiply_add.h b/cutlass/gemm/igemm_multiply_add.h index 5a8baec53..5ff6c7c1b 100644 --- a/cutlass/gemm/igemm_multiply_add.h +++ b/cutlass/gemm/igemm_multiply_add.h @@ -28,9 +28,9 @@ */ #pragma once -#include +#include "cutlass/fragment.h" -#include +#include "cutlass/gemm/thread_multiply_add.h" namespace cutlass { namespace gemm { @@ -38,16 +38,18 @@ namespace gemm { //////////////////////////////////////////////////////////////////////////////////////////////////// /// Template performing matrix multiply-add operation within a thread -template -struct ThreadMultiplyAdd { +template +struct ThreadMultiplyAdd { /// The shape of the instruction. typedef Shape<4, 1, 1> InstructionShape; - /// The number of accumulators per thread. - typedef AccumulatorsPerThread_ AccumulatorsPerThread; + /// Shape of the thread-level GEMM (K-by-N-by-M) + typedef ThreadGemmShape_ ThreadGemmShape; + /// Aliased for compatibility. Will be removed in CUTLASS v2.0 + typedef ThreadGemmShape AccumulatorsPerThread; /// The number of threads per warp. typedef ThreadsPerWarp_ ThreadsPerWarp; /// The number of accumulators per warp. - typedef typename ShapeMul::Shape AccumulatorsPerWarp; + typedef typename ShapeMul::Shape AccumulatorsPerWarp; /// The type for A. typedef int8_t ScalarA; /// The fragment for A. diff --git a/cutlass/gemm/igemm_swizzle.h b/cutlass/gemm/igemm_swizzle.h index 77cf7118d..fbb68d143 100644 --- a/cutlass/gemm/igemm_swizzle.h +++ b/cutlass/gemm/igemm_swizzle.h @@ -27,7 +27,7 @@ */ #pragma once -#include +#include "cutlass/fragment.h" namespace cutlass { namespace gemm { @@ -82,6 +82,11 @@ struct IgemmSwizzle { int a2 = src_int[i2]; int a3 = src_int[i3]; + // // DEBUG. + // if (threadIdx.x == 0) { + // printf("a=0x%08x 0x%08x 0x%08x 0x%08x\n", a0, a1, a2, a3); + // } + int b0, b1, b2, b3, c0; asm volatile("prmt.b32 %0, %1, %2, 0x0040;" : "=r"(b0) : "r"(a0), "r"(a1)); asm volatile("prmt.b32 %0, %1, %2, 0x0040;" : "=r"(c0) : "r"(a2), "r"(a3)); @@ -99,6 +104,11 @@ struct IgemmSwizzle { asm volatile("prmt.b32 %0, %1, %2, 0x0073;" : "=r"(c0) : "r"(a2), "r"(a3)); asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b3) : "r"(b3), "r"(c0)); + // // DEBUG. + // if (threadIdx.x == 0) { + // printf("b=0x%08x 0x%08x 0x%08x 0x%08x\n", b0, b1, b2, b3); + // } + dst_int[i0] = b0; dst_int[i1] = b1; dst_int[i2] = b2; diff --git a/cutlass/gemm/igemm_traits.h b/cutlass/gemm/igemm_traits.h index 82f8de5cd..5bceeda92 100644 --- a/cutlass/gemm/igemm_traits.h +++ b/cutlass/gemm/igemm_traits.h @@ -29,18 +29,18 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "cutlass/convert.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/gemm_epilogue.h" +#include "cutlass/gemm/gemm_epilogue_traits.h" +#include "cutlass/gemm/gemm_global_tile.h" +#include "cutlass/gemm/gemm_shared_tile.h" +#include "cutlass/gemm/gemm_traits.h" +#include "cutlass/gemm/igemm_epilogue.h" +#include "cutlass/gemm/igemm_global_tile.h" +#include "cutlass/gemm/igemm_multiply_add.h" +#include "cutlass/gemm/igemm_swizzle.h" +#include "cutlass/reshape_tile.h" namespace cutlass { namespace gemm { @@ -52,49 +52,52 @@ template < typename OutputTile_, /// The output type. typename ScalarD_, - /// The number of accumulators per thread. - typename AccumulatorsPerThread_> -struct IgemmConfig - : public GemmConfig< - /// The scalar type for A. - int8_t, - /// The scalar type for B. - int8_t, - /// The scalar type for C. - ScalarD_, - /// The scalar type for D. - ScalarD_, - /// The tile size for the GEMM KxNxM. - OutputTile_, - /// The functor to do the math in the main loop. - ThreadMultiplyAdd, int8_t, int8_t, int>, - /// The number of scalars per LDG for A. - 4, - /// The number of scalars per STS for A. - 4, - /// The number of scalars per LDS for A. - 16, - /// The number of scalars per LDG for B. - 4, - /// The number of scalars per STS for B. - 4, - /// The number of scalars per LDS for B. - 16, - /// The number of scalars per LDG for C and STG for D. - 1, - /// The number of scalars per STS for D. - 4, - /// The number of scalars per LDS for D. - 1, - /// The number of stages in shared memory. - 2, - /// Enable the code path that deals with the residue in epilogue. - true> {}; + /// Tile size for thread-level GEMM (K-by-N-by-M) + typename ThreadGemmShape_> +struct IgemmConfig : public GemmConfig< + /// The scalar type for A. + int8_t, + /// The scalar type for B. + int8_t, + /// The scalar type for C. + ScalarD_, + /// The scalar type for D. + ScalarD_, + /// The tile size for the GEMM KxNxM. + OutputTile_, + /// The functor to do the math in the main loop. + ThreadMultiplyAdd, int8_t, int8_t, int>, + /// The number of scalars per LDG for A. + 4, + /// The number of scalars per STS for A. + 4, + /// The number of scalars per LDS for A. + 16, + /// The number of scalars per LDG for B. + 4, + /// The number of scalars per STS for B. + 4, + /// The number of scalars per LDS for B. + 16, + /// The number of scalars per LDG for C and STG for D. + 1, + /// The number of scalars per STS for D. + 4, + /// The number of scalars per LDS for D. + 1, + /// The number of stages in shared memory. + 2, + /// kResidueSeparate + false, + /// kResidueInPrologue + false, + /// kLaunchBounds + false> {}; //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct IgemmConfig +template +struct IgemmConfig : public GemmConfig< /// The scalar type for A. int8_t, @@ -107,7 +110,7 @@ struct IgemmConfig /// The tile size for the GEMM KxNxM. OutputTile_, /// The functor to do the math in the main loop. - ThreadMultiplyAdd, int8_t, int8_t, int>, + ThreadMultiplyAdd, int8_t, int8_t, int>, /// The number of scalars per LDG for A. 4, /// The number of scalars per STS for A. @@ -128,8 +131,12 @@ struct IgemmConfig 4, /// The number of stages in shared memory. 2, - /// Enable the code path that deals with the residue in epilogue. - true> {}; + /// If true, separate mainloop is instantiated from residue + false, + /// Compute residue in prolog? + true, + /// Launch bounds? + false> {}; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -162,7 +169,7 @@ struct IgemmTileTraitsHelperA GemmConfig_::kScalarsPerLdgA> GlobalTileTraits; - // The iterator. + /// The global load iterator. typedef GemmGlobalIteratorAb GlobalLoadIterator; /// The traits class to build the iterator to store data to shared memory for A^N. @@ -208,7 +215,7 @@ struct IgemmTileTraitsHelperA { GemmConfig_::kScalarsPerLdgA> GlobalTileTraits; - // The iterator. + /// The global load iterator. typedef IgemmGlobalIteratorAb GlobalLoadIterator; /// The traits class to build the iterator to store data to shared memory for A^N. @@ -281,7 +288,7 @@ struct IgemmTileTraitsHelperB { GemmConfig_::kScalarsPerLdgB> GlobalTileTraits; - // The iterator. + /// The global load iterator. typedef IgemmGlobalIteratorAb GlobalLoadIterator; /// The traits class to build the iterator to store data to shared memory for B^N. @@ -345,7 +352,7 @@ struct IgemmTileTraitsHelperB GemmConfig_::kScalarsPerLdgB> GlobalTileTraits; - // The iterator. + /// The global load iterator. typedef GemmGlobalIteratorAb GlobalLoadIterator; /// The traits class to build the iterator to store data to shared memory for B^N. @@ -404,13 +411,13 @@ template < typename ScalarD_, /// The functor to do the math in the epilogue. typename EpilogueFunctor_, - /// The number of accumulators per thread. - typename AccumulatorsPerThread_ = Shape<32, 8, 8>, + /// Tile size for thread-level GEMM (K-by-N-by-M) + typename ThreadGemmShape_ = Shape<32, 8, 8>, /// The index. typename Index_ = int> struct IgemmTraitsHelper { /// The IGEMM config. - typedef IgemmConfig GemmConfig; + typedef IgemmConfig GemmConfig; /// The GEMM config for A. typedef IgemmTileTraitsHelperA GemmTileTraitsHelperA; /// The GEMM config for B. @@ -418,7 +425,6 @@ struct IgemmTraitsHelper { /// The iterator to load A from global memory. typedef typename GemmTileTraitsHelperA::GlobalLoadIterator GlobalLoadIteratorA; - /// The default transformer for A. typedef typename IgemmTransformerA::Transformer GlobalTransformerA; @@ -429,12 +435,14 @@ struct IgemmTraitsHelper { MemorySpace::kShared> SharedStoreIteratorA; /// The stream to load A from global memory to shared memory. - typedef GlobalLoadStream + typedef GlobalLoadStream GlobalLoadStreamA; /// The iterator to load B from global memory. typedef typename GemmTileTraitsHelperB::GlobalLoadIterator GlobalLoadIteratorB; - // The default transformer for B. typedef typename IgemmTransformerB::Transformer GlobalTransformerB; @@ -445,7 +453,10 @@ struct IgemmTraitsHelper { MemorySpace::kShared> SharedStoreIteratorB; /// The stream to load B from global memory to shared memory. - typedef GlobalLoadStream + typedef GlobalLoadStream GlobalLoadStreamB; /// The iterator to load A from shared memory. @@ -501,8 +512,8 @@ template < typename ScalarD_ = int, /// The functor to do the math in the epilogue. typename EpilogueFunctor_ = LinearScaling::Scalar>, - /// The number of accumulators per thread. - typename AccumulatorsPerThread_ = Shape<32, 8, 8>, + /// Tile size for thread-level GEMM (K-by-N-by-M) + typename ThreadGemmShape_ = Shape<32, 8, 8>, /// The index. typename Index_ = int, /// The helper class. @@ -511,7 +522,7 @@ template < OutputTile_, ScalarD_, EpilogueFunctor_, - AccumulatorsPerThread_, + ThreadGemmShape_, Index_> > struct IgemmTraits : public GemmTraits< // The config. diff --git a/cutlass/gemm/linear_scaling.h b/cutlass/gemm/linear_scaling.h index 979c93f96..a12fc5f19 100644 --- a/cutlass/gemm/linear_scaling.h +++ b/cutlass/gemm/linear_scaling.h @@ -1,3 +1,4 @@ + /*************************************************************************************************** * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. * @@ -27,18 +28,31 @@ */ #pragma once -#include +#include "cutlass/fragment_multiply_add.h" namespace cutlass { namespace gemm { //////////////////////////////////////////////////////////////////////////////////////////////////// +template +CUTLASS_DEVICE bool is_zero(T x) { + return x == T(0); +} + +#if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16) +CUTLASS_DEVICE bool is_zero(half x) { return reinterpret_cast(x) == int16_t(0); } +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + /// Functor to compute linear combination of fragments -template > +template > struct LinearScaling { // The scalar. typedef Scalar_ Scalar; + // The accumulator Type + typedef typename FragmentMultiplyAdd_::ScalarAccum ScalarAccum; // The adapater. typedef FragmentMultiplyAdd_ FragmentMultiplyAdd; @@ -47,6 +61,21 @@ struct LinearScaling { /// The alpha/beta scaling params. Scalar alpha, beta; + // + // Methods + // + + // Constructor + CUTLASS_HOST_DEVICE + Params(Scalar _alpha = 0, Scalar _beta = 0) : alpha(_alpha), beta(_beta) {} + + /// Initialize the parameters + CUTLASS_HOST_DEVICE int initialize(Scalar _alpha, Scalar _beta) { + alpha = _alpha; + beta = _beta; + return 0; + } + /// Initialize the parameters. template CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) { @@ -56,14 +85,53 @@ struct LinearScaling { } }; + // + // Data members + // + + Params params; + + // + // Methods + // + /// Ctor. - CUTLASS_DEVICE LinearScaling(Params const& params) : alpha(params.alpha), beta(params.beta) {} + CUTLASS_DEVICE LinearScaling() { } + + /// Ctor. + CUTLASS_DEVICE LinearScaling(Params const& _params) : params(_params) {} + + /// Method to determine whether the source accumulator matrix C is ever needed. This method + /// may always safely return true, though better performance is possible if the source accumulator + /// matrix is never loaded unnecessarily. + CUTLASS_DEVICE + bool source_required() const { + return !is_zero(params.beta); + } /// Evaluate the functor. template CUTLASS_DEVICE void evaluate(FragmentA_ const& accum, FragmentB_& output) { FragmentMultiplyAdd mad; - mad.multiply(alpha, accum, output); + mad.multiply(params.alpha, accum, output); + + } + + /// Evaluate the functor, without using fragment in the API + template + CUTLASS_DEVICE void evaluate(ScalarAccum const *accum, ScalarOutput *output) { + Fragment FragAccum; + Fragment FragOutput; +#pragma unroll + for (int i = 0; i < size; i++) { + FragAccum[i] = accum[i]; + FragOutput[i] = output[i]; + } + evaluate(FragAccum, FragOutput); +#pragma unroll + for (int i = 0; i < size; i++) { + output[i] = FragOutput[i]; + } } /// Evaluate the functor. @@ -71,12 +139,28 @@ struct LinearScaling { CUTLASS_DEVICE void evaluate(FragmentA_ const& accum, FragmentB_ const& old, FragmentB_& output) { FragmentMultiplyAdd mad; FragmentB_ tmp; - mad.multiply(beta, old, tmp); - mad.multiply_add(alpha, accum, tmp, output); + mad.multiply(params.beta, old, tmp); + mad.multiply_add(params.alpha, accum, tmp, output); } - /// The alpha/beta scaling factors. - Scalar alpha, beta; + /// Evaluate the functor, without using fragment in the API + template + CUTLASS_DEVICE void evaluate(ScalarAccum const *accum, ScalarOutput const *old, ScalarOutput *output) { + Fragment FragAccum; + Fragment FragOutput; + Fragment FragOld; +#pragma unroll + for (int i = 0; i < size; i++) { + FragAccum[i] = accum[i]; + FragOutput[i] = output[i]; + FragOld[i] = old[i]; + } + evaluate(FragAccum, FragOld, FragOutput); +#pragma unroll + for (int i = 0; i < size; i++) { + output[i] = FragOutput[i]; + } + } }; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/cutlass/gemm/linear_scaling_device_ptr.h b/cutlass/gemm/linear_scaling_device_ptr.h new file mode 100644 index 000000000..5dc845da4 --- /dev/null +++ b/cutlass/gemm/linear_scaling_device_ptr.h @@ -0,0 +1,149 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements the BLAS linear scaling function alpha*AB + beta*C +*/ +#pragma once + +#include "cutlass/cutlass.h" +#include "cutlass/gemm/scalar_or_pointer.h" +#include "cutlass/gemm/linear_scaling.h" + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Functor to compute linear combination of fragments. This is intended to support passing scalars +/// either by value from the host or by reference to device-side scalar elements. This is inspired +/// by cuBLAS's device pointer mode. +template > +struct LinearScalingDevicePtr : public LinearScaling { + + /// Linear Scaling class used + typedef LinearScaling Base; + + // The scalar. + typedef typename Base::Scalar Scalar; + + /// The parameters. + class Params { + private: + /// Alpha scalar + detail::ScalarOrPointer alpha_; + + /// Beta sclaar + detail::ScalarOrPointer beta_; + + public: + // + // Methods + // + + // Constructor + CUTLASS_HOST_DEVICE + Params() {} + + // Constructor + CUTLASS_HOST_DEVICE + Params( + Scalar alpha, + Scalar beta + ): + alpha_(alpha), + beta_(beta) {} + + // Constructor + CUTLASS_HOST_DEVICE + Params( + Scalar const *alpha_ptr, + Scalar const *beta_ptr + ): + alpha_(alpha_ptr), + beta_(alpha_ptr) {} + + /// Initialize the parameters + CUTLASS_HOST_DEVICE int initialize( + Scalar alpha, + Scalar beta) { + + alpha_ = alpha; + beta_ = beta; + + return 0; + } + + /// Initialize the parameters + CUTLASS_HOST_DEVICE int initialize( + Scalar const *alpha, + Scalar const *beta) { + + alpha_ = alpha; + beta_= beta; + + return 0; + } + + /// Initialize the parameters. + template + CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) { + + alpha_ = desc.alpha; + beta_ = desc.beta; + + return 0; + } + + /// Gets the alpha scalar + CUTLASS_HOST_DEVICE + Scalar alpha() const { + return alpha_; + } + + /// Gets the beta scalar + CUTLASS_HOST_DEVICE + Scalar beta() const { + return beta_; + } + }; + + // + // Methods + // + + /// Ctor. + CUTLASS_HOST_DEVICE LinearScalingDevicePtr(Params const& _params) { + this->params.alpha = _params.alpha(); + this->params.beta = _params.beta(); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/scalar_or_pointer.h b/cutlass/gemm/scalar_or_pointer.h new file mode 100644 index 000000000..7c4b4b75d --- /dev/null +++ b/cutlass/gemm/scalar_or_pointer.h @@ -0,0 +1,129 @@ + +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements the BLAS linear scaling function alpha*AB + beta*C +*/ +#pragma once + +#include "cutlass/cutlass.h" + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace detail { + +/// Helper class defines an object which operates as either a scalar or a pointer. If the pointer +/// is non-null, it is dereferenced when the object is accessed. +template +class ScalarOrPointer { +public: + /// Underlying scalar type + typedef Scalar_ Scalar; + +private: + // + // Data members + // + + /// Scalar value + Scalar scalar; + + /// Pointer to use if non null + Scalar const *ptr; + +public: + + // + // Methods + // + + /// Default ctor + CUTLASS_HOST_DEVICE + ScalarOrPointer(): scalar(0), ptr(nullptr) {} + + /// Object behaves as a scalar + CUTLASS_HOST_DEVICE + ScalarOrPointer(Scalar const &val): scalar(val), ptr(nullptr) {} + + /// Object behaves as a scalar + CUTLASS_HOST_DEVICE + ScalarOrPointer(Scalar const *ptr_): scalar(0), ptr(ptr_) {} + + /// Returns true if is pointer + CUTLASS_HOST_DEVICE + bool is_pointer() const { + return bool(ptr); + } + + /// Gets the pointer value + CUTLASS_HOST_DEVICE + Scalar const *get_ptr() const { + return ptr; + } + + /// Gets the pointer value + CUTLASS_HOST_DEVICE + Scalar get_scalar() const { + return scalar; + } + + /// Assigns to a scalar and sets pointer to nullptr + CUTLASS_HOST_DEVICE + ScalarOrPointer &operator=(Scalar const &scalar_) { + scalar = scalar_; + ptr = nullptr; + return *this; + } + + /// Assigns to a pointer value + CUTLASS_HOST_DEVICE + ScalarOrPointer &operator=(Scalar const *ptr_) { + ptr = ptr_; + return *this; + } + + /// Access the element + CUTLASS_HOST_DEVICE + Scalar get() const { + if (ptr) { + return *ptr; + } + return scalar; + } + + /// Accesses the element + CUTLASS_HOST_DEVICE + operator Scalar() const { + return get(); + } +}; + +} // namespace detail + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/gemm/sgemm_traits.h b/cutlass/gemm/sgemm_traits.h index 66b767748..8ce7f58e2 100644 --- a/cutlass/gemm/sgemm_traits.h +++ b/cutlass/gemm/sgemm_traits.h @@ -27,13 +27,13 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/gemm_epilogue.h" +#include "cutlass/gemm/gemm_epilogue_traits.h" +#include "cutlass/gemm/gemm_global_tile.h" +#include "cutlass/gemm/gemm_shared_tile.h" +#include "cutlass/gemm/gemm_traits.h" +#include "cutlass/gemm/thread_multiply_add.h" namespace cutlass { namespace gemm { @@ -43,46 +43,53 @@ namespace gemm { template < /// The tile size for the GEMM KxNxM. typename OutputTile_, - /// The number of accumulators per thread. - typename AccumulatorsPerThread_, + /// Tile size for thread-level GEMM (K-by-N-by-M) + typename ThreadGemmShape_, /// The number of scalars per LDG for A. int kScalarsPerLdgA_ = 1, /// The number of scalars per LDG for B. - int kScalarsPerLdgB_ = 1> -struct SgemmConfig - : public GemmConfig< - /// The scalar type for A. - float, - /// The scalar type for B. - float, - /// The scalar type for C. - float, - /// The scalar type for D. - float, - /// The tile size for the GEMM KxNxM. - OutputTile_, - /// The functor to do the math in the main loop. - ThreadMultiplyAdd, float, float, float>, - /// The number of scalars per LDG for A. - kScalarsPerLdgA_, - /// The number of scalars per STS for A. - kScalarsPerLdgA_, - /// The number of scalars per LDS for A. - 4, - /// The number of scalars per LDG for B. - kScalarsPerLdgB_, - /// The number of scalars per STS for B. - kScalarsPerLdgB_, - /// The number of scalars per LDS for B. - 4, - /// The number of scalars per LDG for C and STG for D. - 1, - /// The number of scalars per STS for D. - 4, - /// The number of scalars per LDS for D. - 1, - /// The number of stages in shared memory. - 2> {}; + int kScalarsPerLdgB_ = 1, + /// Whether to specify launch bounds + bool kLaunchBounds = true> +struct SgemmConfig : public GemmConfig< + /// The scalar type for A. + float, + /// The scalar type for B. + float, + /// The scalar type for C. + float, + /// The scalar type for D. + float, + /// The tile size for the GEMM KxNxM. + OutputTile_, + /// The functor to do the math in the main loop. + ThreadMultiplyAdd, float, float, float>, + /// The number of scalars per LDG for A. + kScalarsPerLdgA_, + /// The number of scalars per STS for A. + kScalarsPerLdgA_, + /// The number of scalars per LDS for A. + 4, + /// The number of scalars per LDG for B. + kScalarsPerLdgB_, + /// The number of scalars per STS for B. + kScalarsPerLdgB_, + /// The number of scalars per LDS for B. + 4, + /// The number of scalars per LDG for C and STG for D. + 1, + /// The number of scalars per STS for D. + 4, + /// The number of scalars per LDS for D. + 1, + /// The number of stages in shared memory. + 2, + /// kResidueSeparate + false, + /// kResidueInPrologue + true, + /// kLaunchBounds + kLaunchBounds> {}; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -95,8 +102,8 @@ template < typename OutputTile_ = Shape<8, 128, 128>, /// The functor to use in the epilogue. typename EpilogueFunctor_ = LinearScaling, - /// The number of accumulators per thread. - typename AccumulatorsPerThread_ = Shape<8, 8, 8>, + /// Tile size for thread-level GEMM (K-by-N-by-M) + typename ThreadGemmShape_ = Shape<8, 8, 8>, /// The number of floats loaded in one LDG for A. int kScalarsPerLdgA_ = 1, /// The number of floats loaded in one LDG for B. @@ -105,7 +112,7 @@ template < typename Index_ = int, /// The SGEMM config. typename GemmConfig_ = - SgemmConfig, + SgemmConfig, /// The traits class for the epilogue. typename GemmEpilogueTraits_ = SimplifiedGemmEpilogueTraits > @@ -123,5 +130,43 @@ struct SgemmTraits : public SimplifiedGemmTraits< //////////////////////////////////////////////////////////////////////////////////////////////////// +/// Helper to define SGEMM traits using Launch Bounds +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The output tile. + typename OutputTile_ = Shape<8, 128, 128>, + /// The functor to use in the epilogue. + typename EpilogueFunctor_ = LinearScaling, + /// Tile size for thread-level GEMM (K-by-N-by-M) + typename ThreadGemmShape_ = Shape<8, 8, 8>, + /// The number of floats loaded in one LDG for A. + int kScalarsPerLdgA_ = 1, + /// The number of floats loaded in one LDG for B. + int kScalarsPerLdgB_ = 1, + /// The index. + typename Index_ = int, + /// The SGEMM config. + typename GemmConfig_ = + SgemmConfig, + /// The traits class for the epilogue. + typename GemmEpilogueTraits_ = + SimplifiedGemmEpilogueTraits > +struct SgemmLBTraits : public SimplifiedGemmTraits< + // The layout for A. + kLayoutA_, + // The layout for B. + kLayoutB_, + // The config. + GemmConfig_, + // The epilogue. + GemmEpilogue, + // The index. + Index_> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + } // namespace gemm } // namespace cutlass diff --git a/cutlass/gemm/thread_multiply_add.h b/cutlass/gemm/thread_multiply_add.h index 20dca1596..b95dee58a 100644 --- a/cutlass/gemm/thread_multiply_add.h +++ b/cutlass/gemm/thread_multiply_add.h @@ -27,7 +27,7 @@ */ #pragma once -#include +#include "cutlass/fragment.h" namespace cutlass { namespace gemm { @@ -35,20 +35,23 @@ namespace gemm { //////////////////////////////////////////////////////////////////////////////////////////////////// /// Template performing matrix multiply-add operation within a thread -template + typename ScalarC_, + MatrixLayout::Kind kLayout_ = MatrixLayout::kColumnMajor> struct ThreadMultiplyAdd { /// The shape of the instruction. typedef Shape<1, 1, 1, 1> InstructionShape; - /// The number of accumulators per thread. - typedef AccumulatorsPerThread_ AccumulatorsPerThread; + /// The shape of a thread-leveel matrix multiply accumulate. + typedef ThreadGemmShape_ ThreadGemmShape; + /// Aliased to "AccumulatorsPerThread" for compatibility. Expect to be renamed in CUTLASS v2.0 + typedef ThreadGemmShape AccumulatorsPerThread; /// The number of threads per warp. typedef ThreadsPerWarp_ ThreadsPerWarp; /// The number of accumulators per warp. - typedef typename ShapeMul::Shape AccumulatorsPerWarp; + typedef typename ShapeMul::Shape AccumulatorsPerWarp; /// The type for A. typedef ScalarA_ ScalarA; /// The fragment for A. @@ -70,9 +73,18 @@ struct ThreadMultiplyAdd { FragmentB const& b, Accumulators const& c, Accumulators& d) { - for (int j = 0; j < AccumulatorsPerThread::kH; ++j) { - for (int i = 0; i < AccumulatorsPerThread::kW; ++i) { - d[j * AccumulatorsPerThread::kW + i] = a[i] * b[j] + c[j * AccumulatorsPerThread::kW + i]; + if(kLayout_ == MatrixLayout::kColumnMajor) { + for (int j = 0; j < AccumulatorsPerThread::kH; ++j) { + for (int i = 0; i < AccumulatorsPerThread::kW; ++i) { + d[j * AccumulatorsPerThread::kW + i] = a[i] * b[j] + c[j * AccumulatorsPerThread::kW + i]; + } + } + } + else { + for(int i = 0; i < AccumulatorsPerThread::kW; ++i) { + for(int j = 0; j < AccumulatorsPerThread::kH; ++j) { + d[i * AccumulatorsPerThread::kH + j] = a[i] * b[j] + c[i * AccumulatorsPerThread::kH + j]; + } } } } diff --git a/cutlass/gemm/threadblock_swizzle.h b/cutlass/gemm/threadblock_swizzle.h new file mode 100644 index 000000000..fe7a3be7f --- /dev/null +++ b/cutlass/gemm/threadblock_swizzle.h @@ -0,0 +1,387 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defies functors for mapping blockIdx to partitions of the GEMM computation. +*/ +#pragma once + +#include "cutlass/coord.h" +#include "cutlass/gemm/gemm_coord.h" + +namespace cutlass { +namespace gemm { + +struct swizzleDirection { + enum Kind { Boustrophedon, OneDirection }; +}; +// helper template function +template +CUTLASS_DEVICE int getLinearIdx(int groups) { + // groupCols is not needed for OneDirection Swizzle + return blockIdx.y * gridDim.x + blockIdx.x; +} +template <> +CUTLASS_DEVICE int getLinearIdx(int groups) { + // reverse blockIdx.x for some columns + if ((blockIdx.y / groups) % 2 == 1) + return blockIdx.y * gridDim.x + (gridDim.x - blockIdx.x - 1); + else + return blockIdx.y * gridDim.x + blockIdx.x; +} +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup IdentityBlockSwizzle Identity Block Swizzle +@{ + Block Swizzle provides the mapping logic between a block in the physical memory of Matrix C and +Thread Block + Identiy Block Swizzle effective maps blocks in leading dimension order (column major) with +thread block + in leading dimension order (blockIdx.x) + blockIdx.z is mapped with batch_count for batched GEMM +@} +*/ +struct IdentityBlockSwizzle { + /// Ctor. aka ColumnMajorBlockSwizzle<1> + CUTLASS_HOST_DEVICE IdentityBlockSwizzle() {} + + /// Swizzle the block index. + CUTLASS_DEVICE dim3 swizzle() { return blockIdx; } + + /// + CUTLASS_HOST_DEVICE dim3 get_grid_layout(GemmCoord const &problem_size, + Coord<3> const &OutputTile) { + /*OutputTile and problem_size are both in KNM order*/ + dim3 grid; + grid.x = (problem_size.m() + OutputTile[2] - 1) / OutputTile[2]; + grid.y = (problem_size.n() + OutputTile[1] - 1) / OutputTile[1]; + grid.z = problem_size.batch(); + return grid; + } + + /// + CUTLASS_DEVICE Coord<3> get_threadblock_offset(Coord<3> const &OutputTile) { + dim3 block = swizzle(); + Coord<3> threadblock_offset = + make_Coord(0, block.y * OutputTile[1], block.x * OutputTile[2]); + return threadblock_offset; + } + + /// + CUTLASS_DEVICE int get_batch_id() { + dim3 block = swizzle(); + return block.z; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/* +ColumnMajorBlockSwizzle<1, OneDirection> is equivalent with IdentityBlockSwizzle +groupCols has the effect of controlling the schedulling of thread blocks +settings with different groupCols can contribute to the overall performance by affecting L2 cache +hit rate + +consider a regular thread block mapping btween matrix C and different thread blocks +note that C is column major, and the leading dimension of thread block id is blockIdx.x + +let's look at an example where gridIdx.x = 6, gridIdx.y = 7, gridIdx.z = 1 +(blockIdx.x, blockIdx.y) +mapping between threadblockID and C matrix: +------------------------------------------------------- +(0,0) | (0,1) | (0,2) | (0,3) | (0,4) | (0,5) | (0,6) | +------------------------------------------------------- +(1,0) | (1,1) | (1,2) | (1,3) | (1,4) | (1,5) | (1,6) | +------------------------------------------------------- +(2,0) | (2,1) | (2,2) | (2,3) | (2,4) | (2,5) | (2,6) | +------------------------------------------------------- +(3,0) | (3,1) | (3,2) | (3,3) | (3,4) | (3,5) | (3,6) | +------------------------------------------------------- +(4,0) | (4,1) | (4,2) | (4,3) | (4,4) | (4,5) | (4,6) | +------------------------------------------------------- +(5,0) | (5,1) | (5,2) | (5,3) | (5,4) | (5,5) | (5,6) | +------------------------------------------------------- + +A ColumnMajorBlockSwizzle<1, OneDirection> will imply the above order where threadblocks are +launched in a column major + +A ColumnMajorBlockSwizzle<2, OneDirection> swizzles things a little, +------------------------------------------------------- +(0,0) | (3,0) | (0,2) | (3,2) | (0,4) | (3,4) | (0,6) | +------------------------------------------------------- +(0,1) | (3,1) | (0,3) | (3,3) | (0,5) | (3,5) | (1,6) | +------------------------------------------------------- +(1,0) | (4,0) | (1,2) | (4,2) | (1,4) | (4,4) | (2,6) | +------------------------------------------------------- +(1,1) | (4,1) | (1,3) | (4,3) | (1,5) | (4,5) | (3,6) | +------------------------------------------------------- +(2,0) | (5,0) | (2,2) | (5,2) | (2,4) | (5,4) | (4,6) | +------------------------------------------------------- +(2,1) | (5,1) | (2,3) | (5,3) | (2,5) | (5,5) | (5,6) | +------------------------------------------------------- + +so in memory, it would apprear that we work on 2 columns at a time rather than 1 +Note that the index here really represent how each block maps to memory + +A ColumnMajorBlockSwizzle<1, Boustrophedon> is similar to ColumnMajorBlockSwizzle<1, OneDirection> +except that every column flips the ordering against the previous one +------------------------------------------------------- +(0,0) | (5,1) | (0,2) | (5,3) | (0,4) | (5,5) | (0,6) | +------------------------------------------------------- +(1,0) | (4,1) | (1,2) | (4,3) | (1,4) | (4,5) | (1,6) | +------------------------------------------------------- +(2,0) | (3,1) | (2,2) | (3,3) | (2,4) | (3,5) | (2,6) | +------------------------------------------------------- +(3,0) | (2,1) | (3,2) | (2,3) | (3,4) | (2,5) | (3,6) | +------------------------------------------------------- +(4,0) | (1,1) | (4,2) | (1,3) | (4,4) | (1,5) | (4,6) | +------------------------------------------------------- +(5,0) | (0,1) | (5,2) | (0,3) | (5,4) | (0,5) | (5,6) | +------------------------------------------------------- + +similarily, A ColumnMajorBlockSwizzle<2, Boustrophedon> looks like +------------------------------------------------------- +(0,0) | (3,0) | (2,3) | (5,3) | (0,4) | (3,4) | (5,6) | +------------------------------------------------------- +(0,1) | (3,1) | (2,2) | (5,2) | (0,5) | (3,5) | (4,6) | +------------------------------------------------------- +(1,0) | (4,0) | (1,3) | (4,3) | (1,4) | (4,4) | (3,6) | +------------------------------------------------------- +(1,1) | (4,1) | (1,2) | (4,2) | (1,5) | (4,5) | (2,6) | +------------------------------------------------------- +(2,0) | (5,0) | (0,3) | (3,3) | (2,4) | (5,4) | (1,6) | +------------------------------------------------------- +(2,1) | (5,1) | (0,2) | (3,2) | (2,5) | (5,5) | (0,6) | +------------------------------------------------------- + +*/ + +template +struct ColumnMajorBlockSwizzle { + /// Ctor. + CUTLASS_HOST_DEVICE ColumnMajorBlockSwizzle() {} + + /// Swizzle the block index. + CUTLASS_DEVICE dim3 swizzle() { + assert(gridDim.z == 1); + int linearIdx = getLinearIdx(groupCols); + dim3 swizzledBlockIdx; + int currGroupCols = groupCols; + int prevGroupCols = groupCols; + + if ((gridDim.y % groupCols != 0) && ((blockIdx.y + (gridDim.y % groupCols)) >= gridDim.y)) { + // last colmuns if gridDim.y is not divisble by groupCols + currGroupCols = gridDim.y % groupCols; + } + + swizzledBlockIdx.x = (linearIdx / currGroupCols) % gridDim.x; + swizzledBlockIdx.y = + linearIdx % currGroupCols + prevGroupCols * (linearIdx / (prevGroupCols * gridDim.x)); + swizzledBlockIdx.z = blockIdx.z; + + return swizzledBlockIdx; + } + + /// + CUTLASS_HOST_DEVICE dim3 get_grid_layout(GemmCoord const &problem_size, + Coord<3> const &OutputTile) { + dim3 grid; + grid.x = (problem_size.m() + OutputTile[2] - 1) / OutputTile[2]; + grid.y = (problem_size.n() + OutputTile[1] - 1) / OutputTile[1]; + grid.z = problem_size.batch(); + return grid; + } + + /// + CUTLASS_DEVICE Coord<3> get_threadblock_offset(Coord<3> const &OutputTile) { + dim3 block = swizzle(); + Coord<3> threadblock_offset = + make_Coord(0, block.y * OutputTile[1], block.x * OutputTile[2]); + return threadblock_offset; + } + + /// + CUTLASS_DEVICE int get_batch_id() { + dim3 block = swizzle(); + return block.z; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/* + +consider a regular thread block mapping btween matrix C and different thread blocks +note that C is column major, and the leading dimension of thread block id is blockIdx.x + +let's look at an example where gridIdx.x = 6, gridIdx.y = 7, gridIdx.z = 1 +(blockIdx.x, blockIdx.y) +mapping between threadblockID and C matrix: +------------------------------------------------------- +(0,0) | (0,1) | (0,2) | (0,3) | (0,4) | (0,5) | (0,6) | +------------------------------------------------------- +(1,0) | (1,1) | (1,2) | (1,3) | (1,4) | (1,5) | (1,6) | +------------------------------------------------------- +(2,0) | (2,1) | (2,2) | (2,3) | (2,4) | (2,5) | (2,6) | +------------------------------------------------------- +(3,0) | (3,1) | (3,2) | (3,3) | (3,4) | (3,5) | (3,6) | +------------------------------------------------------- +(4,0) | (4,1) | (4,2) | (4,3) | (4,4) | (4,5) | (4,6) | +------------------------------------------------------- +(5,0) | (5,1) | (5,2) | (5,3) | (5,4) | (5,5) | (5,6) | +------------------------------------------------------- + +A RowMajorBlockSwizzle<1, OneDirection> will effectively transpose the map + +----------------------------------------------- +(0,0) | (1,0) | (2,0) | (3,0) | (4,0) | (5,0) | +----------------------------------------------- +(0,1) | (1,1) | (2,1) | (3,1) | (4,1) | (5,1) | +----------------------------------------------- +(0,2) | (1,2) | (2,2) | (3,2) | (4,2) | (5,2) | +----------------------------------------------- +(0,3) | (1,3) | (2,3) | (3,3) | (4,3) | (5,3) | +----------------------------------------------- +(0,4) | (1,4) | (2,4) | (3,4) | (4,4) | (5,4) | +--------------------------------------------- +(0,5) | (1,5) | (2,5) | (3,5) | (4,5) | (5,5) | +----------------------------------------------- +(0,6) | (1,6) | (2,6) | (3,6) | (4,6) | (5,6) | +----------------------------------------------- + +It would aprear in memory we are working on 1 row at a time + +A ColumnMajorBlockSwizzle<2, OneDirection> swizzles things a little bit more +----------------------------------------------- +(0,0) | (1,3) | (2,0) | (3,3) | (4,0) | (5,3) | +----------------------------------------------- +(1,0) | (0,4) | (3,0) | (2,4) | (5,0) | (4,4) | +----------------------------------------------- +(0,1) | (1,4) | (2,1) | (3,4) | (4,1) | (5,4) | +----------------------------------------------- +(1,1) | (0,5) | (3,1) | (2,5) | (5,1) | (4,5) | +----------------------------------------------- +(0,2) | (1,5) | (2,2) | (3,5) | (4,2) | (5,5) | +--------------------------------------------- +(1,2) | (0,6) | (3,2) | (2,6) | (5,2) | (4,6) | +----------------------------------------------- +(0,3) | (1,6) | (2,3) | (3,6) | (4,3) | (5,6) | +----------------------------------------------- + +so in memory, it would apprear that we work on 2 rows at a time rather than 1 row +Note that the index here really represent how each block maps to memory + +A RowMajorBlockSwizzle<1, Boustrophedon> is similar to RowMajorBlockSwizzle<1, OneDirection> +except that every column flips the ordering against the previous one + +----------------------------------------------- +(0,0) | (1,6) | (2,0) | (3,6) | (4,0) | (5,6) | +----------------------------------------------- +(0,1) | (1,5) | (2,1) | (3,5) | (4,1) | (5,5) | +----------------------------------------------- +(0,2) | (1,4) | (2,2) | (3,4) | (4,2) | (5,4) | +----------------------------------------------- +(0,3) | (1,3) | (2,3) | (3,3) | (4,3) | (5,3) | +----------------------------------------------- +(0,4) | (1,2) | (2,4) | (3,2) | (4,4) | (5,2) | +--------------------------------------------- +(0,5) | (1,1) | (2,5) | (3,1) | (4,5) | (5,1) | +----------------------------------------------- +(0,6) | (1,0) | (2,6) | (3,0) | (4,6) | (5,0) | +----------------------------------------------- + +similarily, A RowMajorBlockSwizzle<2, Boustrophedon> looks like +----------------------------------------------- +(0,0) | (1,3) | (2,3) | (3,6) | (4,0) | (5,3) | +----------------------------------------------- +(1,0) | (0,4) | (3,2) | (2,6) | (5,0) | (4,4) | +----------------------------------------------- +(0,1) | (1,4) | (2,2) | (3,5) | (4,1) | (5,4) | +----------------------------------------------- +(1,1) | (0,5) | (3,1) | (2,5) | (5,1) | (4,5) | +----------------------------------------------- +(0,2) | (1,5) | (2,1) | (3,4) | (4,2) | (5,5) | +--------------------------------------------- +(1,2) | (0,6) | (3,0) | (2,4) | (5,2) | (4,6) | +----------------------------------------------- +(0,3) | (1,6) | (2,0) | (3,3) | (4,3) | (5,6) | +----------------------------------------------- + +*/ + +template +struct RowMajorBlockSwizzle { + /// Ctor. + CUTLASS_HOST_DEVICE RowMajorBlockSwizzle() {} + + /// Swizzle the block index. + CUTLASS_DEVICE dim3 swizzle() { + assert(gridDim.z == 1); + int linearIdx = getLinearIdx(groupRows); + dim3 swizzledBlockIdx; + int currGroupRows = groupRows; + int prevGroupRows = groupRows; + + if ((gridDim.y % groupRows != 0) && ((blockIdx.y + (gridDim.y % groupRows)) >= gridDim.y)) { + // last columns + currGroupRows = gridDim.y % groupRows; + } + + swizzledBlockIdx.x = + linearIdx % currGroupRows + prevGroupRows * (linearIdx / (prevGroupRows * gridDim.x)); + swizzledBlockIdx.y = (linearIdx / currGroupRows) % gridDim.x; + swizzledBlockIdx.z = blockIdx.z; + + return swizzledBlockIdx; + } + + /// + CUTLASS_HOST_DEVICE dim3 get_grid_layout(GemmCoord const &problem_size, + Coord<3> const &OutputTile) { + dim3 grid; + grid.x = (problem_size.n() + OutputTile[1] - 1) / OutputTile[1]; + grid.y = (problem_size.m() + OutputTile[2] - 1) / OutputTile[2]; + grid.z = problem_size.batch(); + return grid; + } + + /// + CUTLASS_DEVICE Coord<3> get_threadblock_offset(Coord<3> const &OutputTile) { + dim3 block = swizzle(); + Coord<3> threadblock_offset = + make_Coord(0, block.y * OutputTile[1], block.x * OutputTile[2]); + return threadblock_offset; + } + + /// + CUTLASS_DEVICE int get_batch_id() { + dim3 block = swizzle(); + return block.z; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/wmma_gemm_epilogue_traits.h b/cutlass/gemm/wmma_gemm_epilogue_traits.h index 0fafacf90..f35264dda 100644 --- a/cutlass/gemm/wmma_gemm_epilogue_traits.h +++ b/cutlass/gemm/wmma_gemm_epilogue_traits.h @@ -27,18 +27,18 @@ */ #pragma once -#include +#include "cutlass/wmma_matrix.h" #ifdef CUTLASS_USE_WMMA_API -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "cutlass/convert.h" +#include "cutlass/coord.h" +#include "cutlass/gemm/gemm_global_stream.h" +#include "cutlass/gemm/gemm_shared_stream.h" +#include "cutlass/gemm/linear_scaling.h" +#include "cutlass/gemm/wmma_gemm_global_tile.h" +#include "cutlass/gemm/wmma_gemm_shared_tile.h" +#include "cutlass/reshape_tile.h" +#include "cutlass/tile_iterator.h" namespace cutlass { namespace gemm { @@ -89,7 +89,7 @@ struct WmmaGemmEpilogueTraitsHelper { MemorySpace::kShared, Index_, WmmaMatrix, - IteratorFragment::kWmmaMatrix> + FragmentElementType::kWmmaMatrix> SharedStoreIteratorD; /// The shared store transformer for D. @@ -114,6 +114,9 @@ struct WmmaGemmEpilogueTraitsHelper { MemorySpace::kShared> SharedLoadIteratorD; + /// The stream to load D. + typedef SharedLoadStream SharedLoadStreamD; + /// The traits class to build the iterator to load data from global memory for C^N. typedef WmmaGemmGlobalIteratorCdTraits< // The pointer is float const. diff --git a/cutlass/gemm/wmma_gemm_global_tile.h b/cutlass/gemm/wmma_gemm_global_tile.h index dbd57f6b5..ce369d0eb 100644 --- a/cutlass/gemm/wmma_gemm_global_tile.h +++ b/cutlass/gemm/wmma_gemm_global_tile.h @@ -27,7 +27,7 @@ */ #pragma once -#include +#include "cutlass/gemm/gemm_global_tile.h" namespace cutlass { namespace gemm { @@ -68,22 +68,13 @@ struct WmmaGemmGlobalIteratorCdTraits : public GemmGlobalTileTraits -struct WmmaGemmGlobalIteratorCd : public TileIteratorBase { +struct WmmaGemmGlobalIteratorCd : public GemmGlobalIteratorCd { /// This class. typedef WmmaGemmGlobalIteratorCd This_; /// The traits. typedef TileTraits_ Traits; /// The base class. - typedef TileIteratorBase - Base; + typedef GemmGlobalIteratorCd Base; /// Override the strides in each dimension between different loads/stores. typedef Shape<0, 0, Base::Delta::kW, Base::Delta::kC> ImmediateOffsetStrides; /// The layout. @@ -99,47 +90,36 @@ struct WmmaGemmGlobalIteratorCd : public TileIteratorBasepointer = pointer; + BaseParams::pointer = pointer; + // Stride between GEMMs + BaseParams::stride_d = batch_stride; // Setup the base stride. One "group of threads" per column. - stride_h = ld; + BaseParams::stride_h = ldm; // Each thread output 1 column per iteration. . - inc_h = ld * TileTraits_::Threads::kH; - inc_advance = inc_h + epilogue_stride_w; + BaseParams::inc_h = ldm * TileTraits_::Threads::kH; + BaseParams::inc_advance = BaseParams::inc_h + epilogue_stride_w; - predicate_offset = n; - predicate_inc_h = TileTraits_::Threads::kH; - predicate_inc_advance = predicate_inc_h + epilogue_delta_w; + BaseParams::predicate_offset = n; + BaseParams::predicate_inc_h = TileTraits_::Threads::kH; + BaseParams::predicate_inc_advance = BaseParams::predicate_inc_h + epilogue_delta_w; - // It worked. return 0; } }; - Params params; - - Coord<4> thread_offset; - - /// Ctor. - CUTLASS_DEVICE WmmaGemmGlobalIteratorCd() {} - /// Ctor. CUTLASS_DEVICE WmmaGemmGlobalIteratorCd(Params const& params, const Coord<3>& bounds, @@ -148,61 +128,37 @@ struct WmmaGemmGlobalIteratorCd : public TileIteratorBaseparams.pointer += ((h * params.stride_h + w) + pointer_offset); + : Base(params, bounds, block, pointer_offset, pred_offset, thread_offset_func) {} - // Prepare the vector of predicates. - for (int i = 0; i < Base::Iterations::kW; ++i) { - predicates.set(i, w + i * Base::Delta::kW < bounds[2]); - } - this->params.predicate_offset -= (h + pred_offset); + /// Loads a single fragment element from memory + CUTLASS_DEVICE void load_element( + typename Base::AccessType& value, int d, int h, int w, int c) const { + Base::load_element(value, d, h, w, c); } - /// The accessor. - CUTLASS_DEVICE void get(typename Base::AccessType& value, int d, int h, int w, int c) const { - int const imm = - ComputeOffsetFromStrides::get(0, 0, w, c); - Load::load(value, params.pointer, imm); - } - - /// Increment the pointer in the C dimension. - CUTLASS_DEVICE void inc_c() {} - /// Increment the pointer in the W dimension. - CUTLASS_DEVICE void inc_w() {} - /// Increment the pointer in the H dimension. - CUTLASS_DEVICE void inc_h() { - params.pointer += params.inc_h; - params.predicate_offset -= params.predicate_inc_h; - } - /// Increment the pointer in the D dimension. - CUTLASS_DEVICE void inc_d() {} - /// Increment the pointer to move to the next iteration. - CUTLASS_DEVICE void inc_advance() { - params.pointer += params.inc_advance; - params.predicate_offset -= params.predicate_inc_advance; - } - - /// The accessor. - CUTLASS_DEVICE void set(typename Base::AccessType const& value, int d, int h, int w, int c) { - int const imm = + /// Stores a single fragment element into memory + CUTLASS_DEVICE void store_element( + typename Base::AccessType const& value, int d, int h, int w, int c) { + int const offset = ComputeOffsetFromStrides::get(d, h, w, 0); - Store::store( - value, params.pointer, imm); + Store::store(value, Base::params.pointer, offset); } - /// Test the predicate. - CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { - return predicates.at(w) && params.predicate_offset > 0; + public: + template + CUTLASS_DEVICE void load_post_increment(Fragment& fragment) { + Base::load_post_increment(fragment); } - /// The predicates for the row. - cutlass::PredicateVector predicates; + template + CUTLASS_DEVICE void store_post_increment(Fragment& fragment) { + Base::store_post_increment(fragment); + } }; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/cutlass/gemm/wmma_gemm_multiply_add.h b/cutlass/gemm/wmma_gemm_multiply_add.h index 5968350e0..328e43adb 100644 --- a/cutlass/gemm/wmma_gemm_multiply_add.h +++ b/cutlass/gemm/wmma_gemm_multiply_add.h @@ -27,9 +27,9 @@ */ #pragma once -#include +#include "cutlass/wmma_matrix.h" #ifdef CUTLASS_USE_WMMA_API -#include +#include "cutlass/fragment.h" namespace cutlass { namespace gemm { @@ -42,15 +42,17 @@ template struct WmmaGemmMultiplyAdd { /// The shape of the instruction. typedef InstructionShape_ InstructionShape; /// The number of threads per warp. That's a dummy configuration. typedef Shape<1, InstructionShape_::kH, InstructionShape_::kW> ThreadsPerWarp; - /// The dimensions. - typedef AccumulatorsPerWarp_ AccumulatorsPerWarp; + /// Dimensions of the warp-level GEMM (K-by-N-by-M) + typedef WarpGemmShape_ WarpGemmShape; + /// Aliased for compatibility. Will be removed in CUTLASS v2.0 + typedef WarpGemmShape_ AccumulatorsPerWarp; /// The type for A. typedef ScalarA_ ScalarA; /// The type for B. @@ -102,6 +104,251 @@ struct WmmaGemmMultiplyAdd { //////////////////////////////////////////////////////////////////////////////////////////////////// +#ifdef CUTLASS_USE_SUBBYTE_WMMA +/// Specialization for WMMA GEMM with binary operands +template +struct WmmaGemmMultiplyAdd , + MatrixLayout::kColumnMajor, + Vector, + MatrixLayout::kColumnMajor, + int, + WarpGemmShape_, + Shape<128, 8, 8> >{ + /// The shape of the instruction. + typedef Shape<128, 8, 8> InstructionShape; + /// The number of threads per warp. That's a dummy configuration. + typedef Shape<1, 4, 8> ThreadsPerWarp; + /// Dimensions of the warp-level GEMM (K-by-N-by-M) + typedef WarpGemmShape_ WarpGemmShape; + /// Aliased for compatibility. Will be removed in CUTLASS v2.0 + typedef WarpGemmShape_ AccumulatorsPerWarp; + /// The type for A. + typedef Vector ScalarA; + /// The type for B. + typedef Vector ScalarB; + /// The type for C and D. + typedef int ScalarC; + /// The number of iterations. + typedef typename ShapeDiv::Shape Iterations; + + /// The element for A. + typedef WmmaMatrix, + InstructionShape> ElementA; + /// The fragment for A. + typedef Fragment FragmentA; + + /// The element for B. + typedef WmmaMatrix, + InstructionShape> ElementB; + /// The fragment for B. + typedef Fragment FragmentB; + + /// The element for C. + typedef WmmaMatrix ElementC; + /// The fragment for C. + typedef Fragment Accumulators; + + /// Ctor. + CUTLASS_DEVICE WmmaGemmMultiplyAdd() {} + + /// Multiply : d = a*b. + CUTLASS_DEVICE void multiply_add(FragmentA const& a, + FragmentB const& b, + Accumulators const& c, + Accumulators& d) { + for (int j = 0; j < Iterations::kH; ++j) { + for (int i = 0; i < Iterations::kW; ++i) { + // The input elements. + ElementA const& elt_a = a[i]; + ElementB const& elt_b = b[j]; + ElementC const& elt_c = c[j * Iterations::kW + i]; + + // The output element. + ElementC& elt_d = d[j * Iterations::kW + i]; + + // The wmma instruction. + nvcuda::wmma::bmma_sync(elt_d, + elt_a, + elt_b, + elt_c, + nvcuda::wmma::experimental::bmmaBitOpXOR, + nvcuda::wmma::experimental::bmmaAccumulateOpPOPC); + } + } + } +}; +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef CUTLASS_USE_SUBBYTE_WMMA +/// Specialization for WMMA GEMM with signed 4-bit integer operands +template +struct WmmaGemmMultiplyAdd , + MatrixLayout::kColumnMajor, + Vector, + MatrixLayout::kColumnMajor, + int, + WarpGemmShape_, + Shape<32, 8, 8> >{ + /// The shape of the instruction. + typedef Shape<32, 8, 8> InstructionShape; + /// The number of threads per warp. That's a dummy configuration. + typedef Shape<1, 4, 8> ThreadsPerWarp; + /// Dimensions of the warp-level GEMM (K-by-N-by-M) + typedef WarpGemmShape_ WarpGemmShape; + /// Aliased for compatibility. Will be removed in CUTLASS v2.0 + typedef WarpGemmShape_ AccumulatorsPerWarp; + /// The type for A. + typedef Vector ScalarA; + /// The type for B. + typedef Vector ScalarB; + /// The type for C and D. + typedef int ScalarC; + /// The number of iterations. + typedef typename ShapeDiv::Shape Iterations; + + /// The element for A. + typedef WmmaMatrix, + InstructionShape> ElementA; + /// The fragment for A. + typedef Fragment FragmentA; + + /// The element for B. + typedef WmmaMatrix, + InstructionShape> ElementB; + /// The fragment for B. + typedef Fragment FragmentB; + + /// The element for C. + typedef WmmaMatrix ElementC; + /// The fragment for C. + typedef Fragment Accumulators; + + /// Ctor. + CUTLASS_DEVICE WmmaGemmMultiplyAdd() {} + + /// Multiply : d = a*b. + CUTLASS_DEVICE void multiply_add(FragmentA const& a, + FragmentB const& b, + Accumulators const& c, + Accumulators& d) { + for (int j = 0; j < Iterations::kH; ++j) { + for (int i = 0; i < Iterations::kW; ++i) { + // The input elements. + ElementA const& elt_a = a[i]; + ElementB const& elt_b = b[j]; + ElementC const& elt_c = c[j * Iterations::kW + i]; + + // The output element. + ElementC& elt_d = d[j * Iterations::kW + i]; + + // The wmma instruction. + nvcuda::wmma::mma_sync(elt_d, elt_a, elt_b, elt_c); + } + } + } +}; +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef CUTLASS_USE_SUBBYTE_WMMA +/// Specialization for WMMA GEMM with unsigned 4-bit integer operands +template +struct WmmaGemmMultiplyAdd , + MatrixLayout::kColumnMajor, + Vector, + MatrixLayout::kColumnMajor, + int, + WarpGemmShape_, + Shape<32, 8, 8> >{ + /// The shape of the instruction. + typedef Shape<32, 8, 8> InstructionShape; + /// The number of threads per warp. That's a dummy configuration. + typedef Shape<1, 4, 8> ThreadsPerWarp; + /// Dimensions of the warp-level GEMM (K-by-N-by-M) + typedef WarpGemmShape_ WarpGemmShape; + /// Aliased for compatibility. Will be removed in CUTLASS v2.0 + typedef WarpGemmShape_ AccumulatorsPerWarp; + /// The type for A. + typedef Vector ScalarA; + /// The type for B. + typedef Vector ScalarB; + /// The type for C and D. + typedef int ScalarC; + /// The number of iterations. + typedef typename ShapeDiv::Shape Iterations; + + /// The element for A. + typedef WmmaMatrix, + InstructionShape> ElementA; + /// The fragment for A. + typedef Fragment FragmentA; + + /// The element for B. + typedef WmmaMatrix, + InstructionShape> ElementB; + /// The fragment for B. + typedef Fragment FragmentB; + + /// The element for C. + typedef WmmaMatrix ElementC; + /// The fragment for C. + typedef Fragment Accumulators; + + /// Ctor. + CUTLASS_DEVICE WmmaGemmMultiplyAdd() {} + + /// Multiply : d = a*b. + CUTLASS_DEVICE void multiply_add(FragmentA const& a, + FragmentB const& b, + Accumulators const& c, + Accumulators& d) { + for (int j = 0; j < Iterations::kH; ++j) { + for (int i = 0; i < Iterations::kW; ++i) { + // The input elements. + ElementA const& elt_a = a[i]; + ElementB const& elt_b = b[j]; + ElementC const& elt_c = c[j * Iterations::kW + i]; + + // The output element. + ElementC& elt_d = d[j * Iterations::kW + i]; + + // The wmma instruction. + nvcuda::wmma::mma_sync(elt_d, elt_a, elt_b, elt_c); + } + } + } +}; +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + } // namespace gemm } // namespace cutlass diff --git a/cutlass/gemm/wmma_gemm_shared_tile.h b/cutlass/gemm/wmma_gemm_shared_tile.h index 7d15b260f..1a90e2f10 100644 --- a/cutlass/gemm/wmma_gemm_shared_tile.h +++ b/cutlass/gemm/wmma_gemm_shared_tile.h @@ -28,18 +28,15 @@ */ #pragma once -#include +#include "cutlass/wmma_matrix.h" #ifdef CUTLASS_USE_WMMA_API -#include -#include +#include "cutlass/gemm/gemm_operand.h" +#include "cutlass/reshape_tile.h" namespace cutlass { namespace gemm { -template -struct Debug {}; - //////////////////////////////////////////////////////////////////////////////////////////////////// template +#include "cutlass/wmma_matrix.h" #ifdef CUTLASS_USE_WMMA_API -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "cutlass/convert.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/gemm_epilogue.h" +#include "cutlass/gemm/gemm_epilogue_traits.h" +#include "cutlass/gemm/gemm_global_tile.h" +#include "cutlass/gemm/gemm_shared_tile.h" +#include "cutlass/gemm/gemm_traits.h" +#include "cutlass/gemm/wmma_gemm_epilogue_traits.h" +#include "cutlass/gemm/wmma_gemm_global_tile.h" +#include "cutlass/gemm/wmma_gemm_multiply_add.h" namespace cutlass { namespace gemm { @@ -53,12 +53,16 @@ template < MatrixLayout::Kind kLayoutB_, /// The tile size for the GEMM KxNxM. typename OutputTile_, + /// The input type. + typename ScalarA_, + /// The input type. + typename ScalarB_, /// The output type. typename ScalarC_, /// The accumulator type. typename Accumulator_, - /// The number of accumulators per warp. - typename AccumulatorsPerWarp_, + /// Tile size for warp-level GEMM (K-by-N-by-M) + typename WarpGemmShape_, /// The shape of the WMMA instruction. typename InstructionShape_, /// The number of scalars per LDG for A. @@ -67,9 +71,9 @@ template < int kScalarsPerLdgB_> struct WmmaGemmConfig : public GemmConfig< /// The scalar type for A. - half, + ScalarA_, /// The scalar type for B. - half, + ScalarB_, /// The scalar type for C. ScalarC_, /// The scalar type for D. @@ -78,12 +82,12 @@ struct WmmaGemmConfig : public GemmConfig< OutputTile_, /// The functor to do the math in the main loop. WmmaGemmMultiplyAdd, /// The number of scalars per LDG for A. kScalarsPerLdgA_, @@ -100,21 +104,29 @@ struct WmmaGemmConfig : public GemmConfig< /// The number of scalars per LDG for C and STG for D. 16 / sizeof(ScalarC_), /// The number of scalars per STS for D. - 16 / sizeof(ScalarC_), + 16 / sizeof(Accumulator_), /// The number of scalars per LDS for D. - 16 / sizeof(ScalarC_), + 16 / sizeof(Accumulator_), /// The number of stages in shared memory. - 1> {}; + 1, + /// If true, residue is computed in mainloop. If false, separate loops are instantiated. + false, + /// Is residue performed in prologue? + true, + /// If true, kernel is launched with CUDA launch bounds specified + false> {}; //////////////////////////////////////////////////////////////////////////////////////////////////// -template +template struct WmmaGemmTileTraitsHelperA {}; //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct WmmaGemmTileTraitsHelperA +template +struct WmmaGemmTileTraitsHelperA : public GemmTileTraitsHelperA { /// The base config. typedef GemmTileTraitsHelperA Base; @@ -173,8 +185,8 @@ struct WmmaGemmTileTraitsHelperA //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct WmmaGemmTileTraitsHelperA { +template +struct WmmaGemmTileTraitsHelperA { /// The layout. static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor; @@ -251,13 +263,276 @@ struct WmmaGemmTileTraitsHelperA { //////////////////////////////////////////////////////////////////////////////////////////////////// -template +#ifdef CUTLASS_USE_SUBBYTE_WMMA +/// Specialization for WMMA GEMM with binary operands +template +struct WmmaGemmTileTraitsHelperA > { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarA Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar; + + /// GemmConfig_::OutputTile::kD is in number of 'bits'. TileTraits expects number of 'Scalar'. + /// Divide by 'kBitsPerScalar' to get the number in 'Scalar'. + static int const kBitsPerScalar = sizeof(Scalar) * 8; + + /// WMMA matrix + typedef WmmaMatrix, + typename GemmConfig_::InstructionShape> + WmmaMatrix; + + /// The traits class to build the iterator to load data from global memory for A^T. + typedef GemmGlobalTileTraits< + // That's A. + GemmOperand::kA, + // A is row-major. + MatrixLayout::kRowMajor, + // The pointer is float const. + Scalar const, + // The tile has size KxM in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD / kBitsPerScalar>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, + GemmConfig_::kThreads / (GemmConfig_::OutputTile::kD / kBitsPerScalar), + GemmConfig_::OutputTile::kD / kBitsPerScalar>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgA / kBitsPerScalar> + GlobalTileTraits; + + /// The skew. + static int const kSkew = 16 / sizeof(MultiplyAddScalar); + /// The tile. + typedef Shape + Tile; + + /// The traits class to build the iterator to store data to shared memory for A^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer. + MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Tile, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsA / kBitsPerScalar> + SharedStoreTileTraits; + + /// The number of elements loaded in one LDG. + static int const kScalarsPerW = GemmConfig_::InstructionShape::kW * GemmConfig_::Warps::kW; + /// The traits class to build the iterator to load from shared memory for A. + typedef WmmaGemmSharedLoadTileATraits< + // The layout of the matrix. + MatrixLayout::kRowMajor, + // The pointer. + MultiplyAddScalar, + // The tile in shared memory. + Tile, + // The number of warps. + typename GemmConfig_::Warps, + // The strides between warps. + GemmConfig_::InstructionShape::kW * Tile::kW, + // The number of iterations to load the data. + Shape<1, 1, GemmConfig_::OutputTile::kW / kScalarsPerW>, + // The stride between iterations. + Shape, + // The shape of the instruction. + typename GemmConfig_::InstructionShape> + SharedLoadTileTraits; +}; +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef CUTLASS_USE_SUBBYTE_WMMA +/// Specialization for WMMA GEMM with unsigned 4-bit integer operands +template +struct WmmaGemmTileTraitsHelperA > { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarA Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar; + + /// GemmConfig_::OutputTile::kD is in number of 'int4'. TileTraits expects number of 'Scalar'. + /// Divide by 'kInt4PerScalar' to get the number in 'Scalar'. + static int const kInt4PerScalar = sizeof(Scalar) * 2; + + /// WMMA matrix + typedef WmmaMatrix, + typename GemmConfig_::InstructionShape> + WmmaMatrix; + + /// The traits class to build the iterator to load data from global memory for A^T. + typedef GemmGlobalTileTraits< + // That's A. + GemmOperand::kA, + // A is row-major. + MatrixLayout::kRowMajor, + // The pointer is float const. + Scalar const, + // The tile has size KxM in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD / kInt4PerScalar>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, + GemmConfig_::kThreads / (GemmConfig_::OutputTile::kD / kInt4PerScalar), + GemmConfig_::OutputTile::kD / kInt4PerScalar>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgA / kInt4PerScalar> + GlobalTileTraits; + + /// The skew. + static int const kSkew = 16 / sizeof(MultiplyAddScalar); + /// The tile. + typedef Shape + Tile; + + /// The traits class to build the iterator to store data to shared memory for A^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer. + MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Tile, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsA / kInt4PerScalar> + SharedStoreTileTraits; + + /// The number of elements loaded in one LDG. + static int const kScalarsPerW = GemmConfig_::InstructionShape::kW * GemmConfig_::Warps::kW; + /// The traits class to build the iterator to load from shared memory for A. + typedef WmmaGemmSharedLoadTileATraits< + // The layout of the matrix. + MatrixLayout::kRowMajor, + // The pointer. + MultiplyAddScalar, + // The tile in shared memory. + Tile, + // The number of warps. + typename GemmConfig_::Warps, + // The strides between warps. + GemmConfig_::InstructionShape::kW * Tile::kW, + // The number of iterations to load the data. + Shape<1, 1, GemmConfig_::OutputTile::kW / kScalarsPerW>, + // The stride between iterations. + Shape, + // The shape of the instruction. + typename GemmConfig_::InstructionShape> + SharedLoadTileTraits; +}; +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef CUTLASS_USE_SUBBYTE_WMMA +/// Specialization for WMMA GEMM with signed 4-bit integer operands +template +struct WmmaGemmTileTraitsHelperA > { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarA Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar; + + /// GemmConfig_::OutputTile::kD is in number of 'int4'. TileTraits expects number of 'Scalar'. + /// Divide by 'kInt4PerScalar' to get the number in 'Scalar'. + static int const kInt4PerScalar = sizeof(Scalar) * 2; + + /// WMMA matrix + typedef WmmaMatrix, + typename GemmConfig_::InstructionShape> + WmmaMatrix; + + /// The traits class to build the iterator to load data from global memory for A^T. + typedef GemmGlobalTileTraits< + // That's A. + GemmOperand::kA, + // A is row-major. + MatrixLayout::kRowMajor, + // The pointer is float const. + Scalar const, + // The tile has size KxM in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD / kInt4PerScalar>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, + GemmConfig_::kThreads / (GemmConfig_::OutputTile::kD / kInt4PerScalar), + GemmConfig_::OutputTile::kD / kInt4PerScalar>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgA / kInt4PerScalar> + GlobalTileTraits; + + /// The skew. + static int const kSkew = 16 / sizeof(MultiplyAddScalar); + /// The tile. + typedef Shape + Tile; + + /// The traits class to build the iterator to store data to shared memory for A^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer. + MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Tile, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsA / kInt4PerScalar> + SharedStoreTileTraits; + + /// The number of elements loaded in one LDG. + static int const kScalarsPerW = GemmConfig_::InstructionShape::kW * GemmConfig_::Warps::kW; + /// The traits class to build the iterator to load from shared memory for A. + typedef WmmaGemmSharedLoadTileATraits< + // The layout of the matrix. + MatrixLayout::kRowMajor, + // The pointer. + MultiplyAddScalar, + // The tile in shared memory. + Tile, + // The number of warps. + typename GemmConfig_::Warps, + // The strides between warps. + GemmConfig_::InstructionShape::kW * Tile::kW, + // The number of iterations to load the data. + Shape<1, 1, GemmConfig_::OutputTile::kW / kScalarsPerW>, + // The stride between iterations. + Shape, + // The shape of the instruction. + typename GemmConfig_::InstructionShape> + SharedLoadTileTraits; +}; +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template struct WmmaGemmTileTraitsHelperB {}; //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct WmmaGemmTileTraitsHelperB +template +struct WmmaGemmTileTraitsHelperB : public GemmTileTraitsHelperB { /// The base config. typedef GemmTileTraitsHelperB Base; @@ -316,8 +591,8 @@ struct WmmaGemmTileTraitsHelperB //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct WmmaGemmTileTraitsHelperB { +template +struct WmmaGemmTileTraitsHelperB { /// The layout. static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor; @@ -394,6 +669,267 @@ struct WmmaGemmTileTraitsHelperB { //////////////////////////////////////////////////////////////////////////////////////////////////// +#ifdef CUTLASS_USE_SUBBYTE_WMMA +/// Specialization for WMMA GEMM with binary operands +template +struct WmmaGemmTileTraitsHelperB > { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarB Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar; + + /// GemmConfig_::OutputTile::kD is in number of 'bits'. TileTraits expects number of 'Scalar'. + /// Divide by 'kBitsPerScalar' to get the number in 'Scalar'. + static int const kBitsPerScalar = sizeof(Scalar) * 8; + + /// WMMA matrix + typedef WmmaMatrix, + typename GemmConfig_::InstructionShape> + WmmaMatrix; + + /// The traits class to build the iterator to load data from global memory for B^N. + typedef GemmGlobalTileTraits< + // That's B. + GemmOperand::kB, + // A is row-major. + MatrixLayout::kColumnMajor, + // The pointer is float const. + Scalar const, + // The tile has size KxM in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD / kBitsPerScalar>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, + GemmConfig_::kThreads / (GemmConfig_::OutputTile::kD / kBitsPerScalar), + GemmConfig_::OutputTile::kD / kBitsPerScalar>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgB / kBitsPerScalar> + GlobalTileTraits; + + /// The skew. + static int const kSkew = 16 / sizeof(MultiplyAddScalar); + /// The tile. + typedef Shape + Tile; + + /// The traits class to build the iterator to store data to shared memory for B^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer. + MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Tile, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsB / kBitsPerScalar> + SharedStoreTileTraits; + + /// The number of elements loaded in one LDG. + static int const kScalarsPerW = GemmConfig_::InstructionShape::kH * GemmConfig_::Warps::kH; + /// The traits class to build the iterator to load from shared memory for B. + typedef WmmaGemmSharedLoadTileBTraits< + // The layout of the matrix. + MatrixLayout::kColumnMajor, + // The pointer. + MultiplyAddScalar, + // The tile in shared memory. + Tile, + // The number of warps. + typename GemmConfig_::Warps, + // The strides between warps. + GemmConfig_::InstructionShape::kH * Tile::kW, + // The number of iterations to load the data. + Shape<1, 1, GemmConfig_::OutputTile::kH / kScalarsPerW>, + // The stride between iterations. + Shape, + // The shape of the instruction. + typename GemmConfig_::InstructionShape> + SharedLoadTileTraits; +}; +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef CUTLASS_USE_SUBBYTE_WMMA +/// Specialization for WMMA GEMM with unsigned 4-bit integer operands +template +struct WmmaGemmTileTraitsHelperB > { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarB Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar; + + /// GemmConfig_::OutputTile::kD is in number of 'int4'. TileTraits expects number of 'Scalar'. + /// Divide by 'kInt4PerScalar' to get the number in 'Scalar'. + static int const kInt4PerScalar = sizeof(Scalar) * 2; + + /// WMMA matrix + typedef WmmaMatrix, + typename GemmConfig_::InstructionShape> + WmmaMatrix; + + /// The traits class to build the iterator to load data from global memory for B^N. + typedef GemmGlobalTileTraits< + // That's B. + GemmOperand::kB, + // A is row-major. + MatrixLayout::kColumnMajor, + // The pointer is float const. + Scalar const, + // The tile has size KxM in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD / kInt4PerScalar>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, + GemmConfig_::kThreads / (GemmConfig_::OutputTile::kD / kInt4PerScalar), + GemmConfig_::OutputTile::kD / kInt4PerScalar>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgB / kInt4PerScalar> + GlobalTileTraits; + + /// The skew. + static int const kSkew = 16 / sizeof(MultiplyAddScalar); + /// The tile. + typedef Shape + Tile; + + /// The traits class to build the iterator to store data to shared memory for B^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer. + MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Tile, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsB / kInt4PerScalar> + SharedStoreTileTraits; + + /// The number of elements loaded in one LDG. + static int const kScalarsPerW = GemmConfig_::InstructionShape::kH * GemmConfig_::Warps::kH; + /// The traits class to build the iterator to load from shared memory for B. + typedef WmmaGemmSharedLoadTileBTraits< + // The layout of the matrix. + MatrixLayout::kColumnMajor, + // The pointer. + MultiplyAddScalar, + // The tile in shared memory. + Tile, + // The number of warps. + typename GemmConfig_::Warps, + // The strides between warps. + GemmConfig_::InstructionShape::kH * Tile::kW, + // The number of iterations to load the data. + Shape<1, 1, GemmConfig_::OutputTile::kH / kScalarsPerW>, + // The stride between iterations. + Shape, + // The shape of the instruction. + typename GemmConfig_::InstructionShape> + SharedLoadTileTraits; +}; +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef CUTLASS_USE_SUBBYTE_WMMA +/// Specialization for WMMA GEMM with signed 4-bit integer operands +template +struct WmmaGemmTileTraitsHelperB > { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarB Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar; + + /// GemmConfig_::OutputTile::kD is in number of 'int4'. TileTraits expects number of 'Scalar'. + /// Divide by 'kInt4PerScalar' to get the number in 'Scalar'. + static int const kInt4PerScalar = sizeof(Scalar) * 2; + + /// WMMA matrix + typedef WmmaMatrix, + typename GemmConfig_::InstructionShape> + WmmaMatrix; + + /// The traits class to build the iterator to load data from global memory for B^N. + typedef GemmGlobalTileTraits< + // That's B. + GemmOperand::kB, + // A is row-major. + MatrixLayout::kColumnMajor, + // The pointer is float const. + Scalar const, + // The tile has size KxM in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD / kInt4PerScalar>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, + GemmConfig_::kThreads / (GemmConfig_::OutputTile::kD / kInt4PerScalar), + GemmConfig_::OutputTile::kD / kInt4PerScalar>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgB / kInt4PerScalar> + GlobalTileTraits; + + /// The skew. + static int const kSkew = 16 / sizeof(MultiplyAddScalar); + /// The tile. + typedef Shape + Tile; + + /// The traits class to build the iterator to store data to shared memory for B^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer. + MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Tile, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsB / kInt4PerScalar> + SharedStoreTileTraits; + + /// The number of elements loaded in one LDG. + static int const kScalarsPerW = GemmConfig_::InstructionShape::kH * GemmConfig_::Warps::kH; + /// The traits class to build the iterator to load from shared memory for B. + typedef WmmaGemmSharedLoadTileBTraits< + // The layout of the matrix. + MatrixLayout::kColumnMajor, + // The pointer. + MultiplyAddScalar, + // The tile in shared memory. + Tile, + // The number of warps. + typename GemmConfig_::Warps, + // The strides between warps. + GemmConfig_::InstructionShape::kH * Tile::kW, + // The number of iterations to load the data. + Shape<1, 1, GemmConfig_::OutputTile::kH / kScalarsPerW>, + // The stride between iterations. + Shape, + // The shape of the instruction. + typename GemmConfig_::InstructionShape> + SharedLoadTileTraits; +}; +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + template < /// The layout for A. MatrixLayout::Kind kLayoutA_, @@ -401,14 +937,18 @@ template < MatrixLayout::Kind kLayoutB_, /// The output tile. typename OutputTile_, + /// The input type. + typename ScalarA_, + /// The input type. + typename ScalarB_, /// The output type. typename ScalarC_, /// The accumulator type. typename Accumulator_, /// The functor to do the math in the epilogue. typename EpilogueFunctor_, - /// The number of accumulators per warp. - typename AccumulatorsPerWarp_, + /// Tile size for warp-level GEMM (K-by-N-by-M) + typename WarpGemmShape_, /// The shape of the WMMA instruction. typename InstructionShape_, /// The number of halfs loaded in one LDG for A. @@ -422,18 +962,20 @@ struct WmmaGemmTraitsHelper { typedef WmmaGemmConfig GemmConfig; /// The GEMM config for A. - typedef WmmaGemmTileTraitsHelperA GemmTileTraitsHelperA; + typedef WmmaGemmTileTraitsHelperA GemmTileTraitsHelperA; /// The GEMM config for B. - typedef WmmaGemmTileTraitsHelperB GemmTileTraitsHelperB; + typedef WmmaGemmTileTraitsHelperB GemmTileTraitsHelperB; /// The iterator to load A from global memory. typedef GemmGlobalIteratorAb @@ -447,7 +989,10 @@ struct WmmaGemmTraitsHelper { MemorySpace::kShared> SharedStoreIteratorA; /// The stream to load A from global memory to shared memory. - typedef GlobalLoadStream + typedef GlobalLoadStream GlobalLoadStreamA; /// The iterator to load B from global memory. @@ -462,7 +1007,10 @@ struct WmmaGemmTraitsHelper { MemorySpace::kShared> SharedStoreIteratorB; /// The stream to load B from global memory to shared memory. - typedef GlobalLoadStream + typedef GlobalLoadStream GlobalLoadStreamB; /// The iterator to load A from shared memory. @@ -472,7 +1020,7 @@ struct WmmaGemmTraitsHelper { MemorySpace::kShared, Index_, typename GemmTileTraitsHelperA::WmmaMatrix, - IteratorFragment::kWmmaMatrix> + FragmentElementType::kWmmaMatrix> SharedLoadIteratorA; /// The stream to load A from shared memory. typedef SharedLoadStream SharedLoadStreamA; @@ -483,7 +1031,7 @@ struct WmmaGemmTraitsHelper { MemorySpace::kShared, Index_, typename GemmTileTraitsHelperB::WmmaMatrix, - IteratorFragment::kWmmaMatrix> + FragmentElementType::kWmmaMatrix> SharedLoadIteratorB; /// The stream to load B from shared memory. typedef SharedLoadStream SharedLoadStreamB; @@ -518,14 +1066,18 @@ template < MatrixLayout::Kind kLayoutB_, /// The tile size for the GEMM KxNxM. typename OutputTile_ = Shape<64, 128, 128>, + /// The input type. + typename ScalarA_ = half, + /// The input type. + typename ScalarB_ = half, /// The output type. typename ScalarC_ = float, /// The functor to do the math in the epilogue. typename EpilogueFunctor_ = LinearScaling, /// The accumulator type. typename Accumulator_ = ScalarC_, - /// The number of accumulators per warp. - typename AccumulatorsPerWarp_ = typename WmmaGemmAccumulatorsPerWarp::Shape, + /// Tile size for warp-level GEMM (K-by-N-by-M) + typename WarpGemmShape_ = typename WmmaGemmAccumulatorsPerWarp::Shape, /// The shape of the WMMA instruction. typename InstructionShape_ = Shape<16, 16, 16>, /// The number of scalars per LDG for A. @@ -538,10 +1090,12 @@ template < typename Helper_ = WmmaGemmTraitsHelper -#include -#include -#include +#include "cutlass/load_store.h" +#include "cutlass/predicate_vector.h" +#include "cutlass/shape.h" namespace cutlass { /////////////////////////////////////////////////////////////////////////////////////////////////// - -/// Loads a fragment from an input iterator +// Used by convolution template CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment) { typename InputIterator::FragmentIterator frag_iterator(fragment); @@ -45,12 +43,12 @@ CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragme for (int w = 0; w < InputIterator::Iterations::kW; ++w) { for (int c = 0; c < InputIterator::Iterations::kC; ++c) { if (iterator.valid(d, h, w, c)) { - iterator.get(reinterpret_cast( - frag_iterator.at(d, h, w, c)), - d, - h, - w, - c); + iterator.load_element(reinterpret_cast( + frag_iterator.at(d, h, w, c)), + d, + h, + w, + c); } } if (w < InputIterator::Iterations::kW - 1) { @@ -68,138 +66,21 @@ CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragme iterator.inc_advance(); } -/// Loads a fragment from a shared memory input iterator -template -CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment) { - typename InputIterator::FragmentIterator frag_iterator(fragment); - for (int d = 0; d < InputIterator::Iterations::kD; ++d) { - for (int h = 0; h < InputIterator::Iterations::kH; ++h) { - for (int w = 0; w < InputIterator::Iterations::kW; ++w) { - for (int c = 0; c < InputIterator::Iterations::kC; ++c) { - int const offset = - ComputeOffsetFromStrides::get( - d, h, w, c); - - FragmentLoad::load(frag_iterator.at(d, h, w, c), - iterator.data(), - offset); - } - } - } - } -} - -/// Loads a fragment from a shared memory input iterator -template -CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment, int d) { - typename InputIterator::FragmentIterator frag_iterator(fragment); - for (int h = 0; h < InputIterator::Iterations::kH; ++h) { - for (int w = 0; w < InputIterator::Iterations::kW; ++w) { - for (int c = 0; c < InputIterator::Iterations::kC; ++c) { - int const offset = - ComputeOffsetFromStrides::get( - d, h, w, c); - - FragmentLoad::load(frag_iterator.at(0, h, w, c), - iterator.data(), - offset); - } - } - } -} - -/// Loads a fragment from an input iterator, masked by a predicate iterator -template -CUTLASS_HOST_DEVICE void iterator_load_post_increment(InputIterator &iterator, - Fragment &fragment, - typename InputIterator::Index offset, - ConstPredicateAdapter predicate_adapter) { - for (int d = 0; d < InputIterator::Iterations::kD; ++d, iterator.inc_d()) { - for (int h = 0; h < InputIterator::Iterations::kH; ++h, iterator.inc_h()) { - for (int w = 0; w < InputIterator::Iterations::kW; ++w, iterator.inc_w()) { - if (predicate_adapter.at(d, h, w, 0)) { - int idx = InputIterator::Tile::kC * - (w + InputIterator::Iterations::kW * (h + InputIterator::Iterations::kH * d)); - - Load:: - load(reinterpret_cast(fragment[idx]), - iterator.data(), - offset); - } - } - } - } -} - -/// Loads a fragment from an input iterator -template -CUTLASS_HOST_DEVICE void iterator_load_post_increment(InputIterator &iterator, - Fragment &fragment, - typename InputIterator::Index offset = 0) { - TrivialPredicateTileAdapter pred; - iterator_load_post_increment(iterator, fragment, offset, pred); -} - -/// Loads a fragment from an input iterator -template -CUTLASS_HOST_DEVICE void iterator_load_post_increment(InputIterator &iterator, - Fragment &fragment, - ConstPredicateAdapter pred_it) { - iterator_load_post_increment(iterator, fragment, 0, pred_it); -} - -template -CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &_iterator, - Fragment &fragment, - typename InputIterator::Index offset, - ConstPredicateAdapter predicate_adapter) { - InputIterator iterator(_iterator); - iterator_load_post_increment(iterator, fragment, offset, predicate_adapter); -} - -/// Loads a fragment from an input iterator -template -CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &iterator, - Fragment &fragment, - typename InputIterator::Index offset = 0) { - TrivialPredicateTileAdapter pred; - iterator_load(iterator, fragment, offset, pred); -} - -/// Loads a fragment from an input iterator -template -CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &iterator, - Fragment &fragment, - ConstPredicateAdapter pred_it) { - iterator_load(iterator, fragment, 0, pred_it); -} - -/////////////////////////////////////////////////////////////////////////////////////////////////// - -/// Stores a fragment to an output iterator template CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment) { typename OutputIterator::FragmentIterator frag_iterator(fragment); for (int d = 0; d < OutputIterator::Iterations::kD; ++d) { for (int h = 0; h < OutputIterator::Iterations::kH; ++h) { for (int w = 0; w < OutputIterator::Iterations::kW; ++w) { - if (iterator.valid(d, h, w, 0)) { - iterator.set(reinterpret_cast( - frag_iterator.at(d, h, w, 0)), - d, - h, - w, - 0); + for (int c = 0; c < OutputIterator::Iterations::kC; ++c) { + if (iterator.valid(d, h, w, c)) { + iterator.store_element(reinterpret_cast( + frag_iterator.at(d, h, w, c)), + d, + h, + w, + c); + } } if (w < OutputIterator::Iterations::kW - 1) { iterator.inc_w(); @@ -215,104 +96,6 @@ CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &frag } iterator.inc_advance(); } - -/// Stores a fragment to a shared memory output iterator -template -CUTLASS_DEVICE void shared_iterator_store(OutputIterator &iterator, Fragment const &fragment) { - typename OutputIterator::FragmentConstIterator frag_iterator(fragment); - for (int d = 0; d < OutputIterator::Iterations::kD; ++d) { - for (int h = 0; h < OutputIterator::Iterations::kH; ++h) { - for (int w = 0; w < OutputIterator::Iterations::kW; ++w) { - for (int c = 0; c < OutputIterator::Iterations::kC; ++c) { - int const offset = - ComputeOffsetFromStrides::get( - d, h, w, c); - - FragmentStore::store(frag_iterator.at(d, h, w, c), - iterator.data(), - offset); - } - } - } - } -} - //////////////////////////////////////////////////////////////////////////////////////////////////// -/// Stores a fragment to an output iterator, masked by a predicate iterator -template -CUTLASS_HOST_DEVICE void iterator_store_post_increment(OutputIterator &iterator, - Fragment const &fragment, - typename OutputIterator::Index offset, - ConstPredicateAdapter predicate_adapter) { - for (int d = 0; d < OutputIterator::Iterations::kD; ++d, iterator.inc_d()) { - for (int h = 0; h < OutputIterator::Iterations::kH; ++h, iterator.inc_h()) { - for (int w = 0; w < OutputIterator::Iterations::kW; ++w, iterator.inc_w()) { - if (predicate_adapter.at(d, h, w, 0)) { - int idx = OutputIterator::Tile::kC * - (w + OutputIterator::Iterations::kW * (h + OutputIterator::Iterations::kH * d)); - - Store:: - store(reinterpret_cast(fragment[idx]), - iterator.data(), - offset); - } - } - } - } -} - -/// Stores a fragment to an output iterator -template -CUTLASS_HOST_DEVICE void iterator_store_post_increment(OutputIterator &iterator, - Fragment const &fragment, - typename OutputIterator::Index offset = 0) { - TrivialPredicateTileAdapter pred; - iterator_store_post_increment(iterator, fragment, offset, pred); -} - -/// Stores a fragment to an output iterator -template -CUTLASS_HOST_DEVICE void iterator_store_post_increment(OutputIterator &iterator, - Fragment const &fragment, - ConstPredicateAdapter pred_it) { - iterator_store_post_increment(iterator, fragment, 0, pred_it); -} - -/// Stores a fragment to an output iterator, masked by a predicate iterator -template -CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &_iterator, - Fragment const &fragment, - typename OutputIterator::Index offset, - ConstPredicateAdapter predicate_adapter) { - OutputIterator iterator(_iterator); - iterator_store_post_increment(iterator, fragment, offset, predicate_adapter); -} - -/// Stores a fragment to an output iterator -template -CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &iterator, - Fragment const &fragment, - typename OutputIterator::Index offset = 0) { - TrivialPredicateTileAdapter pred; - iterator_store(iterator, fragment, offset, pred); -} - -/// Stores a fragment to an output iterator -template -CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &iterator, - Fragment const &fragment, - ConstPredicateAdapter pred_it) { - iterator_store(iterator, fragment, 0, pred_it); -} - -/////////////////////////////////////////////////////////////////////////////////////////////////// - } // namespace cutlass diff --git a/cutlass/kernel_launch.h b/cutlass/kernel_launch.h new file mode 100644 index 000000000..ee37b2fda --- /dev/null +++ b/cutlass/kernel_launch.h @@ -0,0 +1,67 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines structures and helpers to launch CUDA kernels within CUTLASS. +*/ + +#pragma once + +#include "cutlass/cutlass.h" + +namespace cutlass { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Structure containing the basic launch configuration of a CUDA kernel. +struct KernelLaunchConfiguration { + + /// CUDA grid dimensions + dim3 grid; + + /// CUDA threablock dimensions + dim3 block; + + /// Bytes of dynamically allocated SMEM in addition to static SMEM + size_t dynamic_smem; + + // + // Methods + // + + /// Constructs a KernellaunchConfiguration object + CUTLASS_HOST_DEVICE + KernelLaunchConfiguration( + dim3 _grid = dim3(1,1,1), + dim3 _block = dim3(1,1,1), + size_t _dynamic_smem = 0 + ): + grid(_grid), + block(_block), + dynamic_smem(_dynamic_smem) { } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/load_store.h b/cutlass/load_store.h index 5cb5eb672..db09dd0a4 100644 --- a/cutlass/load_store.h +++ b/cutlass/load_store.h @@ -27,8 +27,7 @@ */ #pragma once -#include - +#include "cutlass/vector.h" namespace cutlass { //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -44,45 +43,68 @@ struct MemorySpace { }; }; +/// Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix +struct FragmentElementType { + enum Kind { kScalar, kWmmaMatrix }; +}; + //////////////////////////////////////////////////////////////////////////////////////////////////// template 1), - size_t = (sizeof(Scalar_) * Lanes_)> + FragmentElementType::Kind kFragmentElementType = FragmentElementType::kScalar, + typename FragmentElement_ = Scalar_, + int kStride = 1, + size_t size = (sizeof(Scalar_) * kAccessSize)> struct Load { /// The output type. - typedef typename Vectorize::Type AccessType; + typedef typename Vectorize::Type AccessType; /// The load function. - static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { - dst = reinterpret_cast(&pointer[offset])[0]; + static CUTLASS_HOST_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { + dst = *reinterpret_cast(pointer + offset); + } + +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Partial specialization for 16b loads +template +struct Load { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The load function. + static CUTLASS_HOST_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { + reinterpret_cast(dst) = reinterpret_cast(&pointer[offset])[0]; } }; //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct Load { +template +struct Load { /// The output type. - typedef typename Vectorize::Type AccessType; + typedef typename Vectorize::Type AccessType; - /// The store function. - static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { + /// The load function. + static CUTLASS_HOST_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { dst.registers[0] = reinterpret_cast(&pointer[offset])[0]; } + }; //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct Load { +template +struct Load { /// The output type. - typedef typename Vectorize::Type AccessType; + typedef typename Vectorize::Type AccessType; - /// The store function. - static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { + /// The load function. + static CUTLASS_HOST_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { uint2 tmp = reinterpret_cast(&pointer[offset])[0]; dst.registers[0] = tmp.x; dst.registers[1] = tmp.y; @@ -91,13 +113,13 @@ struct Load { //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct Load { +template +struct Load { /// The output type. typedef typename Vectorize::Type AccessType; - /// The store function. - static CUTLASS_DEVICE void load(AccessType& dst, double const* pointer, int offset) { + /// The load function. + static CUTLASS_HOST_DEVICE void load(AccessType& dst, double const* pointer, int offset) { double2 tmp = reinterpret_cast(&pointer[offset])[0]; dst[0] = tmp.x; dst[1] = tmp.y; @@ -108,13 +130,13 @@ struct Load { #if defined(__CUDACC_VERSION_MAJOR) && __CUDACC_VERSION_MAJOR < 10 // WAR bug in NVCC where the upper and lower half of the register end up being the same -template -struct Load { +template +struct Load { /// The output type. typedef typename Vectorize::Type AccessType; - /// The store function. - static CUTLASS_DEVICE void load(AccessType& dst, half const* pointer, int offset) { + /// The load function. + static CUTLASS_HOST_DEVICE void load(AccessType& dst, half const* pointer, int offset) { int2 tmp = reinterpret_cast(&pointer[offset])[0]; dst.registers[0] = tmp.x; dst.registers[1] = tmp.y; @@ -129,13 +151,13 @@ struct Load { //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct Load { +template +struct Load { /// The output type. - typedef typename Vectorize::Type AccessType; + typedef typename Vectorize::Type AccessType; - /// The store function. - static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { + /// The load function. + static CUTLASS_HOST_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { uint4 tmp = reinterpret_cast(&pointer[offset])[0]; dst.registers[0] = tmp.x; dst.registers[1] = tmp.y; @@ -147,29 +169,45 @@ struct Load { //////////////////////////////////////////////////////////////////////////////////////////////////// template 1), - size_t = (sizeof(Scalar_) * Lanes_)> + FragmentElementType::Kind kFragmentElementType = FragmentElementType::kScalar, + typename FragmentElement_ = Scalar_, + int kStride = 1, + size_t size = (sizeof(Scalar_) * kAccessSize)> struct Store { /// The output type. - typedef typename Vectorize::Type AccessType; + typedef typename Vectorize::Type AccessType; /// The store function. - static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { - pointer[offset] = src; + static CUTLASS_HOST_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { + pointer[offset] = *reinterpret_cast(&src); } }; //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct Store { +template +struct Store { /// The output type. - typedef typename Vectorize::Type AccessType; + typedef typename Vectorize::Type AccessType; /// The store function. - static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { + static CUTLASS_HOST_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { + uint16_t* addr = reinterpret_cast(&pointer[offset]); + addr[0] = reinterpret_cast(src); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Store { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_HOST_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { uint32_t* addr = reinterpret_cast(&pointer[offset]); addr[0] = src.registers[0]; } @@ -177,13 +215,13 @@ struct Store { //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct Store { +template +struct Store { /// The output type. - typedef typename Vectorize::Type AccessType; + typedef typename Vectorize::Type AccessType; /// The store function. - static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { + static CUTLASS_HOST_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { uint2* addr = reinterpret_cast(&pointer[offset]); addr[0] = make_uint2(src.registers[0], src.registers[1]); } @@ -191,13 +229,13 @@ struct Store { //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct Store { +template +struct Store { /// The output type. typedef typename Vectorize::Type AccessType; /// The store function. - static CUTLASS_DEVICE void store(AccessType const& src, double* pointer, int offset) { + static CUTLASS_HOST_DEVICE void store(AccessType const& src, double* pointer, int offset) { double2* addr = reinterpret_cast(&pointer[offset]); addr[0] = make_double2(src[0], src[1]); } @@ -205,13 +243,13 @@ struct Store { //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct Store { +template +struct Store { /// The output type. - typedef typename Vectorize::Type AccessType; + typedef typename Vectorize::Type AccessType; /// The store function. - static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { + static CUTLASS_HOST_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { uint4* addr = reinterpret_cast(&pointer[offset]); addr[0] = make_uint4(src.registers[0], src.registers[1], src.registers[2], src.registers[3]); } @@ -219,4 +257,123 @@ struct Store { //////////////////////////////////////////////////////////////////////////////////////////////////// +template +struct Load { + /// The output type. + typedef FragmentElement_ AccessType; + + /// The load function. + static CUTLASS_HOST_DEVICE void load(AccessType& value, Scalar_ const* pointer, int offset) { + value.load(&pointer[offset], kStride); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Load, + kAccessSize, + Memory_, + FragmentElementType::kWmmaMatrix, + FragmentElement_, + kStride, + size> { + /// The output type. + typedef FragmentElement_ AccessType; + + /// The load function. + static CUTLASS_HOST_DEVICE void load(AccessType& value, Vector const* pointer, + int offset) { + value.load(&pointer[offset], kStride * 32); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Load, + kAccessSize, + Memory_, + FragmentElementType::kWmmaMatrix, + FragmentElement_, + kStride, + size> { + /// The output type. + typedef FragmentElement_ AccessType; + + /// The load function. + static CUTLASS_HOST_DEVICE void load(AccessType& value, Vector const* pointer, + int offset) { + value.load(&pointer[offset], kStride * 8); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Load, + kAccessSize, + Memory_, + FragmentElementType::kWmmaMatrix, + FragmentElement_, + kStride, + size> { + /// The output type. + typedef FragmentElement_ AccessType; + + /// The load function. + static CUTLASS_HOST_DEVICE void load(AccessType& value, Vector const* pointer, + int offset) { + value.load(&pointer[offset], kStride * 8); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +template +struct Store { + /// The input type. + typedef FragmentElement_ AccessType; + + /// The store function. + static CUTLASS_HOST_DEVICE void store(AccessType const& value, Scalar_* pointer, int offset) { + value.store(&pointer[offset], kStride); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + } // namespace cutlass diff --git a/cutlass/matrix_traits.h b/cutlass/matrix_traits.h index 77e8b7062..08a43a99a 100644 --- a/cutlass/matrix_traits.h +++ b/cutlass/matrix_traits.h @@ -27,13 +27,327 @@ */ #pragma once +#include "cutlass/coord.h" + namespace cutlass { //////////////////////////////////////////////////////////////////////////////////////////////////// -/// Describes layouts of matrices +/// MatrixCoord wraps Coord<2, int> to provide a helper for accessing named dimensions. Classes +/// expecting a coordinate in the rank=2 index space of a matrix should use MatrixCoord. +struct MatrixCoord : public Coord<2, int> { + + /// Integer-valued index + typedef int Index; + + /// Base type is a Coord of rank=2 + typedef Coord<2, Index> Base; + + /// Rows dimension + static int const kRow = 0; + + /// Columns dimension + static int const kColumn = 1; + + // + // Methods + // + + /// Default ctor + CUTLASS_HOST_DEVICE + MatrixCoord() { } + + /// Constructs from Coord<2> + CUTLASS_HOST_DEVICE + MatrixCoord(Coord<2, Index> const &coord): Base(coord) { } + + /// Helper to construct from a row and column + CUTLASS_HOST_DEVICE + MatrixCoord(Index row, Index column): Base(make_Coord(row, column)) { } + + /// Returns the row of the coordinate + CUTLASS_HOST_DEVICE + Index const & row() const { return this->at(kRow); } + + /// Returns the row of the coordinate + CUTLASS_HOST_DEVICE + Index & row() { return this->at(kRow); } + + /// Returns the column of the coordinate + CUTLASS_HOST_DEVICE + Index const & column() const { return this->at(kColumn); } + + /// Returns the column of the coordinate + CUTLASS_HOST_DEVICE + Index & column() { return this->at(kColumn); } + + // + // Coord operators + // + + /// Element-wise addition + CUTLASS_HOST_DEVICE + MatrixCoord operator+(Base const& b) const { + return MatrixCoord(Base::operator+(b)); + } + + /// Element-wise subtraction + CUTLASS_HOST_DEVICE + MatrixCoord operator-(Base const& b) const { + return MatrixCoord(Base::operator-(b)); + } + + /// Element-wise multiplication + CUTLASS_HOST_DEVICE + MatrixCoord operator*(Base const& b) const { + return MatrixCoord(Base::operator*(b)); + } + + /// Element-wise division + CUTLASS_HOST_DEVICE + MatrixCoord operator/(Base const& b) const { + return MatrixCoord(Base::operator/(b)); + } + + /// In-place addition + CUTLASS_HOST_DEVICE + MatrixCoord& operator+=(Base const& b) { + Base::operator+=(b); + return *this; + } + + /// In-place subtraction + CUTLASS_HOST_DEVICE + MatrixCoord& operator-=(Base const& b) { + Base::operator-=(b); + return *this; + } + + /// In-place multiplication + CUTLASS_HOST_DEVICE + MatrixCoord& operator*=(Base const& b) { + Base::operator*=(b); + return *this; + } + + /// In-place division + CUTLASS_HOST_DEVICE + MatrixCoord& operator/=(Base const& b) { + Base::operator/=(b); + return *this; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Defines data layouts of various matrix formats usable by TensorRef and other classes. +// +// The following define classes satisfying the TensorRefMapFunc concept. These must support the +// following operations, where func is an instance of type TensorRefMapFunc. +// +// Coord = func(Coord); +// +// Though not required to be usable by TensorRef, each of the following also define a helper +// function to map the "leading dimension" to an appropriate stride vector. Implementations +// following this convention should also implement the following static method: +// +// Coord stride = TensorRefMapFunc::stride(leading_dim); +// struct MatrixLayout { + + /// Enumeration defining fundamental contiguous layouts. enum Kind { kRowMajor, kColumnMajor }; + + // + // TensorRefMapFunc definitions for common layouts + // + + /// Mapping function for row-major matrices + struct RowMajor { + static int const kStorageRank = 2; + /// Maps (i, j) to (i, j) + CUTLASS_HOST_DEVICE + Coord operator()(MatrixCoord const &coord) const { + return coord; + } + }; + + /// Mapping function for column-major matrices + struct ColumnMajor { + static int const kStorageRank = 2; + /// Maps (i, j) to (j, i) + CUTLASS_HOST_DEVICE + Coord operator()(MatrixCoord const &coord) const { + return make_Coord(coord.column(), coord.row()); + } + }; + + /// Mapping function for interleaved matrices. Matrix is structured + /// as row-major arrangement of fixed-size columns. + template + struct RowMajorInterleaved { + + /// Rank of storage n-D array + static int const kStorageRank = 3; + + /// Interleaving size + static int const kInterleave = Interleave; + + /// Maps (row, col) to (row, col, row) + CUTLASS_HOST_DEVICE + Coord operator()(MatrixCoord const &coord) const { + return make_Coord( + coord.row() / kInterleave, + coord.column(), + coord.row() % kInterleave + ); + } + + /// Helper to compute stride vector from leading dimension + CUTLASS_HOST_DEVICE + static Coord stride(int ldm) { + return make_Coord( + ldm * kInterleave, + kInterleave, + 1 + ); + } + }; + + /// Mapping function for interleaved matrices. Matrix is structured + /// as column-major arrangement of fixed-size rows. + template + struct ColumnMajorInterleaved { + + /// Rank of storage n-D array + static int const kStorageRank = 3; + + /// Interleaving size + static int const kInterleave = Interleave; + + /// Maps (row, col) to (col, row, col) + CUTLASS_HOST_DEVICE + Coord operator()(MatrixCoord const &coord) const { + return make_Coord( + coord.column() / kInterleave, + coord.row(), + coord.column() % kInterleave + ); + } + + /// Helper to compute stride vector from leading dimension + CUTLASS_HOST_DEVICE + static Coord stride(int ldm) { + return make_Coord( + ldm * kInterleave, + kInterleave, + 1 + ); + } + }; + + /// Mapping function for scenario in which layout is row-major or column-major but this information + /// is only available at runtime. + struct ContiguousLayout { + /// Arbitrary storage rank + static int const kStorageRank = 3; + + /// Dimension of rows + static int const kRow = 0; + + /// Dimension of columns + static int const kColumn = 1; + + /// Mapping function defined by runtime variable. Returns coordinates in n-D storage array + /// as (matrix row, matrix colum, 0) + CUTLASS_HOST_DEVICE + Coord operator()(MatrixCoord const &coord) const { + return make_Coord(coord.row(), coord.column(), 0); + } + + /// Helper to construct a stride vector based on contiguous matrix layout and leading dimension + CUTLASS_HOST_DEVICE + static Coord stride(MatrixLayout::Kind layout, int ldm) { + if (layout == MatrixLayout::kRowMajor) { + return make_Coord(ldm, 1, 1); + } + return make_Coord(1, ldm, 1); + } + }; + + /// Mapping function for block-linear matrices. Matrix is structured + /// as column-major arrangement of 2D tiles (that are column-major). + template + struct ColumnMajorBlockLinear { + + /// Rank of storage n-D array + static int const kStorageRank = 4; + + /// Interleaving size in rows dimension + static int const kBlockRows = BlockRows; + + /// Interleaving size in columns dimension + static int const kBlockColumns = BlockColumns; + + /// Maps (row, col) to (col, row, col, row) + CUTLASS_HOST_DEVICE + Coord operator()(MatrixCoord const &coord) const { + return make_Coord( + coord.column() / kBlockColumns, + coord.row() / kBlockRows, + coord.column() % kBlockColumns, + coord.row() % kBlockRows + ); + } + + /// Helper to compute stride vector from leading dimension + CUTLASS_HOST_DEVICE + static Coord stride(int ldm) { + return make_Coord( + ldm * kBlockRows * kBlockColumns, + kBlockRows * kBlockColumns, + kBlockRows, + 1 + ); + } + }; + + /// Mapping function for block-linear matrices. Matrix is structured + /// as row-major arrangement of 2D tiles (that are row-major) + template + struct RowMajorBlockLinear { + + /// Rank of storage n-D array + static int const kStorageRank = 4; + + /// Interleaving size in rows dimension + static int const kBlockRows = BlockRows; + + /// Interleaving size in columns dimension + static int const kBlockColumns = BlockColumns; + + /// Maps (row, col) to (row, col, row, col) + CUTLASS_HOST_DEVICE + Coord operator()(MatrixCoord const &coord) const { + return make_Coord( + coord.row() / kBlockRows, + coord.column() / kBlockColumns, + coord.row() % kBlockRows, + coord.column() % kBlockColumns + ); + } + + /// Helper to compute stride vector from leading dimension + CUTLASS_HOST_DEVICE + static Coord stride(int ldm) { + return make_Coord( + ldm * kBlockRows * kBlockColumns, + kBlockRows * kBlockColumns, + kBlockColumns, + 1 + ); + } + }; }; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -45,4 +359,14 @@ struct GemmOperand { //////////////////////////////////////////////////////////////////////////////////////////////////// +/// Transformation applied to matrix operands +struct MatrixTransform { + enum Kind { + kNone, /// no operation + kConjugate, /// conjugate + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + } // namespace cutlass diff --git a/cutlass/predicate_vector.h b/cutlass/predicate_vector.h index 81668577e..4a37d017d 100644 --- a/cutlass/predicate_vector.h +++ b/cutlass/predicate_vector.h @@ -28,12 +28,13 @@ */ #pragma once +#include #include -#include -#include +#include "cutlass/cutlass.h" +#include "cutlass/shape.h" -#include +#include "cutlass/util/platform.h" namespace cutlass { @@ -114,7 +115,7 @@ struct PredicateVector { // Make sure no one tries to put more than 8 bits in a byte :) static_assert(kPredicatesPerByte <= 8, "kPredicatesPerByte must fit within an actual byte"); // Make sure the "offsetted" bits fit in one byte. - static_assert(kPredicateStart + kPredicatesPerByte < 8, + static_assert(kPredicateStart + kPredicatesPerByte <= 8, "The offsetted predicates must fit within an actual byte."); /// Storage type of individual elements diff --git a/cutlass/reshape_tile.h b/cutlass/reshape_tile.h index 55aebfcaf..67faa602a 100644 --- a/cutlass/reshape_tile.h +++ b/cutlass/reshape_tile.h @@ -27,7 +27,7 @@ */ #pragma once -#include +#include "cutlass/shape.h" namespace cutlass { diff --git a/cutlass/shape.h b/cutlass/shape.h index 4f6b222ee..b8c0c66f3 100644 --- a/cutlass/shape.h +++ b/cutlass/shape.h @@ -27,7 +27,7 @@ */ #pragma once -#include +#include "cutlass/cutlass.h" namespace cutlass { @@ -128,6 +128,17 @@ struct ShapeDiv { //////////////////////////////////////////////////////////////////////////////////////////////////// +template +struct ShapeDivCeiling { + typedef Shape<(A_::kD + B_::kD - 1) / B_::kD, + (A_::kH + B_::kH - 1) / B_::kH, + (A_::kW + B_::kW - 1) / B_::kW, + (A_::kC + B_::kC - 1) / B_::kC> + Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + template struct ShapeMax { typedef Shape<(A_::kD > B_::kD ? A_::kD : B_::kD), @@ -150,12 +161,12 @@ struct ShapeMin { //////////////////////////////////////////////////////////////////////////////////////////////////// -template +template struct ShapeStrides { typedef Shape + elementsPerAccess> Shape; }; @@ -167,7 +178,7 @@ struct ShapeStrides { */ template struct ComputeOffsetFromShape { - static CUTLASS_DEVICE int get(int d, int h, int w, int c) { + static CUTLASS_HOST_DEVICE int get(int d, int h, int w, int c) { // clang-format off return d * Shape_::kH * Shape_::kW * Shape_::kC + h * Shape_::kW * Shape_::kC + @@ -179,73 +190,19 @@ struct ComputeOffsetFromShape { //////////////////////////////////////////////////////////////////////////////////////////////////// -/** -* @brief Compute the offset for the given coordinates in a cube with a depth of 1 -* @tparam kSh Elements in the H dimension -* @tparam kSw Elements in the W dimension -* @tparam kSc Separation between two elements in "elements" -*/ -template -struct ComputeOffsetFromShape > { - static CUTLASS_DEVICE int get(int d, int h, int w, int c) { - return h * kSw_ * kSc_ + w * kSc_ + c; - } -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -* @brief Compute the offset for the given coordinates in a cube with one channel and a depth of 1 -* @tparam kSh Elements in the H dimension -* @tparam kSw Elements in the W dimension -*/ -template -struct ComputeOffsetFromShape > { - static CUTLASS_DEVICE int get(int d, int h, int w, int c) { return h * kSw_ + w; } -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - /** * @brief Compute the offset for the given coordinates in a cube * @tparam A \ref layout_concept where each dimension of the cube specifies the corresponding stride. */ template struct ComputeOffsetFromStrides { - static CUTLASS_DEVICE int get(int d, int h, int w, int c) { + static CUTLASS_HOST_DEVICE int get(int d, int h, int w, int c) { return d * Strides_::kD + h * Strides_::kH + w * Strides_::kW + c * Strides_::kC; } }; //////////////////////////////////////////////////////////////////////////////////////////////////// -/** -* @brief Compute the offset for the given coordinates in a cube with a depth of 1 -* @tparam S_h Stride in the H dimension in scalars -* @tparam S_w Stride in the W dimension in scalars -* @tparam S_c Stride between two scalars. -*/ -template -struct ComputeOffsetFromStrides > { - static CUTLASS_DEVICE int get(int d, int h, int w, int c) { - return h * S_h_ + w * S_w_ + c * S_c_; - } -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -* @brief Compute the offset for the given coordinates in a cube with one channel and a depth of 1 -* @tparam S_h Stride in the H dimension in scalars -* @tparam S_w Stride in the W dimension in scalars -*/ -template -struct ComputeOffsetFromStrides > { - static CUTLASS_DEVICE int get(int d, int h, int w, int c) { return h * S_h_ + w * S_w_; } -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - /** * @brief Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. * Afterwards compute the offset of those coordinates using Strides_ diff --git a/cutlass/tensor_ref.h b/cutlass/tensor_ref.h index 8ef31e3b8..09134190c 100644 --- a/cutlass/tensor_ref.h +++ b/cutlass/tensor_ref.h @@ -27,125 +27,613 @@ */ #pragma once -#include - -#include -#include -#include +#include "cutlass/coord.h" +#include "cutlass/cutlass.h" +#include "cutlass/vector.h" namespace cutlass { -//////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////// -/// Structure modeling a pointer and stride into a tensor -template +/// Default mapping function from coordinates in a tensor's index space into the n-D array held +/// in memory. Assumes StorageRank = Rank +template +struct IdentityTensorMapFunc { + static int const kStorageRank = Rank; + CUTLASS_HOST_DEVICE + Coord operator()(Coord const &coord) const { + return coord; + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/* \brief Structure modeling a pointer and stride into a tensor. + + A tensor consists of an index space with Rank_ dimensions. It is stored in memory modeled + as an n-D array, where n = StorageRank_. A mapping function maps the logical coordinates of the + tensor's index space into the n-D array, and a stride vector maps the n-D array to linear memory. + + CUTLASS requires the n-D array's least significant, "fastest changing" dimension to + be contiguous in memory. It therefore has a stride of 1 and is not stored. Construction is offered + from vectors of full StorageRank and of the 'compact' rank, though it is in error to construct + with the least significant stride != 1. + + The requirement that the least significant dimension be consecutive enables numerous optimizations + and assumptions about vectorizing memory accesses throughout CUTLASS. It also matches various + BLAS conventions in which only the "leading dimension" or most significant stride of a rank=2 + matrix is provided. + + This does affect the ability of constructing arbitrary "sparse" 2-D matrices in memory where all + stride elements are > 1. This can be overcome by defining a custom mapping function and a + StorageRank of 3 or more. + + + Examples: + + (These examples use helpers for matrix layouts defined in cutlass/matrix_traits.h) + + 1. Column-major matrix may be represented as a rank=2 tensor: + + TensorRef A(ptr_A, make_Coord(ldm, 1)); + + 2. Row-major matrix may be represented as a rank=2 tensor: + + TensorRef B(ptr_A, ldm); + + 3. An interleaved matrix may be represented as a rank=2 tensor: + + TensorRef > C; + + 4. Defining a sparse matrix with arbitrary strides in each dimension + + struct ContiguousLayout { + + /// Arbitrary storage rank + static int const kStorageRank = 3; + + /// Mapping function defined by runtime stride configuration + CUTLASS_HOST_DEVICE + Coord<3> operator()(MatrixCoord const &coord) const { + return make_Coord(coord.row(), coord.column(), 0); + } + }; + + typedef TensorRef ContiguousTensorRef; + + // Construct the TensorRef object from a pair of stride values + ContiguousTensorRef D(ptr_D, make_Coord(row_stride, column_stride)); + + + 5. A helper exists to define a TensorRef for a contiguous matrix whose layout + is not known at compile time. + + MatrixLayout::Kind layout; // Could be MatrixLayout::kRowMajor or MatrixLayout::kColumnMajor + int ldm; // leading dimension + + ContiguousTensorRef E(ptr_E, ContiguousLayout::stride(layout, ldm)); + +*/ +template < + /// Data type of element stored within tensor + typename Storage_, + /// Rank of logical tensor + int Rank_, + /// Maps a Coord in the logical tensor index space to the internal n-D array + typename MapFunc_ = IdentityTensorMapFunc, + /// Rank of internal n-D array + int StorageRank_ = MapFunc_::kStorageRank, + /// Index type used for coordinates + typename Index_ = int, + /// Index type used for offsets and pointer differences + typename LongIndex_ = long long +> class TensorRef { public: /// Data type of individual access typedef Storage_ Storage; - /// Rank of tensor - static int const Rank = Rank_; + /// Logical rank of tensor index space + static int const kRank = Rank_; + + /// Mapping function from logical coordinate to internal n-D array + typedef MapFunc_ MapFunc; + + /// Rank of internal storage + static int const kStorageRank = StorageRank_; + + /// Index type + typedef Index_ Index; + + /// Typically, strides in memory can be very large + typedef LongIndex_ LongIndex; + + /// Coordinate in logical tensor space + typedef Coord TensorCoord; + + /// Coordinate in storage n-D array + typedef Coord StorageCoord; + + /// Stride vector in storage coordinage space - assumes least significant stride + /// is 1 and does not store it. + typedef Coord StrideVector; + + /// Tensor reference to of constant value + typedef TensorRef< + typename platform::remove_const::type const, + Rank_, + MapFunc_, + StorageRank_, + Index_, + LongIndex_> ConstTensorRef; + + /// Require at least rank=1. Mathematically, a rank=0 tensor would be considered to be a + /// scalar, but degenerate cases such as these are difficult to accommodate without + /// extensive C++ metaprogramming or support for zero-length arrays. + static_assert(kRank > 0, "Cannot define a zero-rank TensorRef"); + + // + // Definitions included for backwards compatibility - to be removed in next major release + // + + /// Coordinate in logical tensor space + typedef TensorCoord Coord_t; + + /// Logical rank of tensor index space + static int const Rank = kRank; private: - // - // Data members - // - /// Pointer to storage element + /// Pointer Storage* ptr_; - /// Stride information - Coord stride_; + /// Stride vector - fastest-changing stride assumed to be 1 and not stored + StrideVector stride_; + + /// Maps a logical coordinate to an n-D array's tensor space + MapFunc coord_map_; public: + // // Methods // - /// Default ctor + /// Helper for 1-D memory. All higher ranks are projected onto the fastest changing rank. CUTLASS_HOST_DEVICE - TensorRef() : ptr_(nullptr) {} + TensorRef(Storage *ptr = nullptr): ptr_(ptr) { + for (int i = 0; i < kStorageRank - 1; ++i) { + stride_[i] = 1; + } + } - /// Constructs from a pointer, size, and stride + /// Helper to construct from a pointer and single stride element for 2-D pitch linear memory. + // Higher ranks are projected onto the fastest-changing rank. CUTLASS_HOST_DEVICE - TensorRef(Storage* ptr, Coord stride) : ptr_(ptr), stride_(stride) {} + TensorRef(Storage* ptr, Index ldm) { + ptr_ = ptr; + for (int i = 0; i < kStorageRank - 1; ++i) { + stride_[i] = ldm; + } + } + + /// Constructs from a single pointer and stride vector + CUTLASS_HOST_DEVICE + TensorRef(Storage* ptr, StrideVector const& stride) : ptr_(ptr), stride_(stride) { + + } + + /// Constructs from a pointer and a stride vector of size kRank. If fastest changing + /// stride is not 1, construction fails and subsequent calls to good() will return false. + CUTLASS_HOST_DEVICE + TensorRef(Storage* ptr, StorageCoord const& stride) { + // Fastest-changing stride must be one + if (stride.at(kStorageRank - 1) == 1) { + ptr_ = ptr; + for (int i = 0; i < kStorageRank - 1; ++i) { + stride_[i] = stride[i]; + } + } + else { + // Fastest-chaning stride must be 1. + reset(); + } + } + + /// Enables conversion from TensorRef of non-const type + CUTLASS_HOST_DEVICE + TensorRef( + TensorRef< + typename platform::remove_const::type, + kRank, + MapFunc, + kStorageRank, + Index, + LongIndex> const &ref + ): + ptr_(ref.data()) { + for (int i = 0; i < kStorageRank - 1; ++i) { + stride_[i] = ref.stride(i); + } + } + + /// Returns a reference to constant-valued tensor + CUTLASS_HOST_DEVICE + ConstTensorRef const_ref() const { + return ConstTensorRef(*this); + } + + /// Updates only the pointer + CUTLASS_HOST_DEVICE + void reset(Storage* ptr = nullptr) { + ptr_ = ptr; + } /// Updates the pointer, stride, and location within a TensorRef CUTLASS_HOST_DEVICE - void reset(Storage* ptr = nullptr, Coord stride = Coord(0)) { - ptr_ = ptr; - stride_ = stride; - } - - /// Conversion function - template - TensorRef convert() { - Coord converted_stride; - for (int i = 0; i < Rank - 1; ++i) { - converted_stride[i] = stride_[i] * Extent::kValue / Extent::kValue; + void reset(Storage* ptr, StorageCoord const & stride) { + // Fastest-changing stride must be one + if (stride.at(kStorageRank - 1) == 1) { + ptr_ = ptr; + for (int i = 0; i < kStorageRank - 1; ++i) { + stride_[i] = stride[i]; + } + } + else { + // Fastest-changing stride must be 1 - this is an error. + reset(); } - converted_stride[Rank - 1] = stride_[Rank - 1]; - - return TensorRef(reinterpret_cast(ptr_), converted_stride); } /// Returns true if the TensorRef may be safely accessed CUTLASS_HOST_DEVICE - bool good() const { return ptr_ != nullptr; } + bool good() const { + return ptr_ != nullptr; + } /// Returns the pointer to referenced data CUTLASS_HOST_DEVICE - Storage* data() const { return ptr_; } + Storage * data() const { return ptr_; } /// Returns the stride of the tensor CUTLASS_HOST_DEVICE - Coord const& stride() const { return stride_; } + StorageCoord stride() const { + StorageCoord ld; + for (int i = 0; i < kStorageRank - 1; ++i) { + ld[i] = stride_[i]; + } + ld[kStorageRank - 1] = 1; + return ld; + } /// Returns the stride of the tensor in the given dimension CUTLASS_HOST_DEVICE - int const& stride(int dim) const { return stride_.at(dim); } + Index stride(int dim) const { + // fastest-changing stride assumbed to be 1 + if (dim + 1 >= kStorageRank) { + return 1; + } + return stride_.at(dim); + } /// Returns the maximum stride element as the 'leading dimension' CUTLASS_HOST_DEVICE - int leading_dim() const { return __NV_STD_MAX(stride_[1], stride_[2]); } + Index leading_dim(int idx = 0) const { return stride(idx); } + + /// Maps a logical coordinate to an n-D array in memory + CUTLASS_HOST_DEVICE + StorageCoord map(TensorCoord const &coord) const { + return coord_map_(coord); + } /// Computes the offset of an index from the origin of the tensor CUTLASS_HOST_DEVICE - long long offset(Coord const& coord) const { - return stride_.template dot(coord); + LongIndex offset(TensorCoord const& coord) const { + return stride().template dot(map(coord)); } /// Returns a reference to the element at a given Coord CUTLASS_HOST_DEVICE - Storage& at(Coord const& coord) const { return ptr_[offset(coord)]; } + Storage& at(TensorCoord const& coord) const { + return ptr_[offset(coord)]; + } - /// Element-wise accessor - Storage& operator[](Coord const& coord) const { return at(coord); } + /// Returns a reference to the element at a given linear index + CUTLASS_HOST_DEVICE + Storage& at(LongIndex idx) const { return ptr_[idx]; } /// Returns a reference to the element at a given Coord CUTLASS_HOST_DEVICE - Storage& at(int idx) const { return ptr_[idx]; } + Storage& operator[](TensorCoord const& coord) const { + return ptr_[offset(coord)]; + } - /// Element-wise accessor - Storage& operator[](int idx) const { return at(idx); } - - /// Adds an offset to the pointer + /// Returns a reference to the element at a given linear index CUTLASS_HOST_DEVICE - TensorRef& advance(Coord const& b) { - ptr_ += offset(b); + Storage& operator[](LongIndex idx) const { return ptr_[idx]; } + + /// Adds an offset to each pointer + CUTLASS_HOST_DEVICE + TensorRef & add_pointer_offset(LongIndex delta) { + ptr_ += delta; return *this; } /// Returns a TensorRef offset by a given amount CUTLASS_HOST_DEVICE - TensorRef operator+(Coord const& b) const { return TensorRef(ptr_ + offset(b), stride_); } + TensorRef operator+(TensorCoord const& b) const { + TensorRef result(*this); + result.add_pointer_offset(offset(b)); + return result; + } /// Returns a TensorRef offset by a given amount CUTLASS_HOST_DEVICE - TensorRef operator-(Coord const& b) const { return TensorRef(ptr_ - offset(b), stride_); } + TensorRef& operator+=(TensorCoord const& b) { + add_pointer_offset(offset(b)); + return *this; + } + + /// Returns a TensorRef offset by a given amount + CUTLASS_HOST_DEVICE + TensorRef operator-(TensorCoord const& b) const { + TensorRef result(*this); + result.add_pointer_offset(-offset(b)); + return result; + } + + /// Returns a TensorRef offset by a given amount + CUTLASS_HOST_DEVICE + TensorRef& operator-=(TensorCoord const& b) { + add_pointer_offset(-offset(b)); + return *this; + } }; -//////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Partial specializations to handle degenerate cases. +// +/////////////////////////////////////////////////////////////////////////////////////////////////// -} // namespace cutlass +/// Specialization for rank=1 case with no internal StrideVector +template < + /// Data type of element stored within tensor + typename Storage_, + /// Rank of logical tensor + int Rank_, + /// Maps a Coord in the logical tensor index space to the internal n-D array + typename MapFunc_, + /// Index type used for coordinates + typename Index_, + /// Index type used for offsets and pointer differences + typename LongIndex_ +> +class TensorRef { + public: + /// Data type of individual access + typedef Storage_ Storage; + + /// Logical rank of tensor index space + static int const kRank = Rank_; + + /// Mapping function from logical coordinate to internal n-D array + typedef MapFunc_ MapFunc; + + /// Rank of internal storage + static int const kStorageRank = 1; + + /// Index type + typedef Index_ Index; + + /// Typically, strides in memory can be very large + typedef LongIndex_ LongIndex; + + /// Coordinate in logical tensor space + typedef Coord TensorCoord; + + /// Coordinate in storage n-D array + typedef Coord StorageCoord; + + /// Stride vector in storage coordinage space - assumes least significant stride + /// is 1 and does not store it. + struct StrideVector { }; + + /// Tensor reference to of constant value + typedef TensorRef< + typename platform::remove_const::type const, + Rank_, + MapFunc_, + kStorageRank, + Index_, + LongIndex_> ConstTensorRef; + + // + // Definitions included for backwards compatibility - to be removed in next major release + // + + /// Coordinate in logical tensor space + typedef TensorCoord Coord_t; + + /// Logical rank of tensor index space + static int const Rank = kRank; + + private: + + /// Pointer + Storage* ptr_; + + /// Maps a logical coordinate to an n-D array's tensor space + MapFunc coord_map_; + + public: + + // + // Methods + // + + /// Helper for 1-D memory. All higher ranks are projected onto the fastest changing rank. + CUTLASS_HOST_DEVICE + TensorRef(Storage *ptr = nullptr): ptr_(ptr) { } + + /// Constructs from a single pointer and stride vector + CUTLASS_HOST_DEVICE + TensorRef(Storage* ptr, StrideVector const& stride) : ptr_(ptr) { + + } + + /// Constructs from a pointer and a stride vector of size kRank. If fastest changing + /// stride is not 1, construction fails and subsequent calls to good() will return false. + CUTLASS_HOST_DEVICE + TensorRef(Storage* ptr, StorageCoord const& stride) { + // Fastest-changing stride must be one + if (stride.at(kStorageRank - 1) == 1) { + ptr_ = ptr; + } + else { + // Fastest-chaning stride must be 1. + reset(); + } + } + + /// Enables conversion from TensorRef of non-const type + CUTLASS_HOST_DEVICE + TensorRef( + TensorRef< + typename platform::remove_const::type, + kRank, + MapFunc, + kStorageRank, + Index, + LongIndex> const &ref + ): + ptr_(ref.data()) { + } + + /// Returns a reference to constant-valued tensor + CUTLASS_HOST_DEVICE + ConstTensorRef const_ref() const { + return ConstTensorRef(*this); + } + + /// Updates only the pointer + CUTLASS_HOST_DEVICE + void reset(Storage* ptr = nullptr) { + ptr_ = ptr; + } + + /// Updates the pointer, stride, and location within a TensorRef + CUTLASS_HOST_DEVICE + void reset(Storage* ptr, StorageCoord const & stride) { + // Fastest-changing stride must be one + if (stride.at(kStorageRank - 1) == 1) { + ptr_ = ptr; + } + else { + // Fastest-changing stride must be 1 - this is an error. + reset(); + } + } + + /// Returns true if the TensorRef may be safely accessed + CUTLASS_HOST_DEVICE + bool good() const { + return ptr_ != nullptr; + } + + /// Returns the pointer to referenced data + CUTLASS_HOST_DEVICE + Storage * data() const { return ptr_; } + + /// Returns the stride of the tensor + CUTLASS_HOST_DEVICE + StorageCoord stride() const { + StorageCoord ld; + ld[kStorageRank - 1] = 1; + return ld; + } + + /// Returns the stride of the tensor in the given dimension + CUTLASS_HOST_DEVICE + Index stride(int dim) const { + // fastest-changing stride assumbed to be 1 + return 1; + } + + /// Returns the maximum stride element as the 'leading dimension' + CUTLASS_HOST_DEVICE + Index leading_dim(int idx = 0) const { return 1; } + + /// Maps a logical coordinate to an n-D array in memory + CUTLASS_HOST_DEVICE + StorageCoord map(TensorCoord const &coord) const { + return coord_map_(coord); + } + + /// Computes the offset of an index from the origin of the tensor + CUTLASS_HOST_DEVICE + LongIndex offset(TensorCoord const& coord) const { + return stride().template dot(map(coord)); + } + + /// Returns a reference to the element at a given Coord + CUTLASS_HOST_DEVICE + Storage& at(TensorCoord const& coord) const { + return ptr_[offset(coord)]; + } + + /// Returns a reference to the element at a given linear index + CUTLASS_HOST_DEVICE + Storage& at(LongIndex idx) const { return ptr_[idx]; } + + /// Returns a reference to the element at a given Coord + CUTLASS_HOST_DEVICE + Storage& operator[](TensorCoord const& coord) const { + return ptr_[offset(coord)]; + } + + /// Returns a reference to the element at a given linear index + CUTLASS_HOST_DEVICE + Storage& operator[](LongIndex idx) const { return ptr_[idx]; } + + /// Adds an offset to each pointer + CUTLASS_HOST_DEVICE + TensorRef & add_pointer_offset(LongIndex delta) { + ptr_ += delta; + return *this; + } + + /// Returns a TensorRef offset by a given amount + CUTLASS_HOST_DEVICE + TensorRef operator+(TensorCoord const& b) const { + TensorRef result(*this); + result.add_pointer_offset(offset(b)); + return result; + } + + /// Returns a TensorRef offset by a given amount + CUTLASS_HOST_DEVICE + TensorRef& operator+=(TensorCoord const& b) { + add_pointer_offset(offset(b)); + return *this; + } + + /// Returns a TensorRef offset by a given amount + CUTLASS_HOST_DEVICE + TensorRef operator-(TensorCoord const& b) const { + TensorRef result(*this); + result.add_pointer_offset(-offset(b)); + return result; + } + + /// Returns a TensorRef offset by a given amount + CUTLASS_HOST_DEVICE + TensorRef& operator-=(TensorCoord const& b) { + add_pointer_offset(-offset(b)); + return *this; + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/tensor_ref_collection.h b/cutlass/tensor_ref_collection.h new file mode 100644 index 000000000..b2972e184 --- /dev/null +++ b/cutlass/tensor_ref_collection.h @@ -0,0 +1,420 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Introduces TensorRefCollection concept and defines TensorRefBatch and TensorRefArray. +*/ + +#pragma once + +#include "cutlass/tensor_ref.h" + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// TensorRefCollection is a concept for storing a logical collection of TensorRef objects. Classes +// satisfying the TensorRefCollection concept must support the following: +// +// // Define storage type +// typedef typename TensorRefCollection::Storage Storage; +// +// // Define a type for offsets in memory +// typedef typename TensorRefCollection::LongIndex LongIndex; +// +// // Define a ConstIterator type satisfying TensorRefIterator +// typedef typename TensorRefCollection::ConstIterator TensorRefIterator; +// +// // Implement a begin() method. +// TensorRefIterator iterator = collection.begin(); +// +// +// TensorRefIterator is a concept for accessing an element in a TensorRefCollection. Classes +// satisfying the TensorRefIterator concept must support the following: +// +// // Define a TensorRef type accessed by the iterator +// typedef typename TensorRefIterator::TensorRef TensorRef; +// +// // Access the TensorRef +// TensorRef ref = *iterator; +// +// // Pre-increment and post-increment +// ++iterator; +// iterator++; +// +// // Pre-decrement and post-decrement +// --iterator; +// iterator--; +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// This satisfies TensorRefCollection and stores a collection of TensorRef objects that +/// have identical strides. TensorRef objects are separated by a linear stride. +template < + /// Data type of element stored within tensor + typename Storage_, + /// Rank of logical tensor + int Rank_, + /// Maps a Coord in the logical tensor index space to the internal n-D array + typename MapFunc_ = IdentityTensorMapFunc, + /// Rank of internal n-D array + int StorageRank_ = MapFunc_::kStorageRank, + /// Index type used for coordinates + typename Index_ = int, + /// Index type used for offsets and pointer differences + typename LongIndex_ = long long +> +struct TensorRefBatchStrided: + public TensorRef { + + // + // Type definitions + // + + /// Underlying TensorRef type + typedef TensorRef Base; + + /// Storage type + typedef typename Base::Storage Storage; + + /// Index type + typedef Index_ Index; + + /// Typically, strides in memory can be very large + typedef LongIndex_ LongIndex; + + /// Coordinate in logical tensor space + typedef Coord TensorCoord; + + /// Tensor reference implied by the TensorRefBatchStrided + typedef Base TensorRef; + + /// Constant iterator over tensors implied by TensorRefBatchStrided + class ConstIterator { + public: + /// TensorRef returned by the iterator + typedef Base TensorRef; + + private: + + /// Reference to the parent TensorBatchRef object + TensorRefBatchStrided const &ref_; + + /// Offset from the base TensorRef pointer + LongIndex offset_; + + public: + + /// Constructs a ConstIterator from a parent TensorRefBatchStrided + CUTLASS_HOST_DEVICE + ConstIterator( + TensorRefBatchStrided const &ref, + LongIndex offset = 0): ref_(ref), offset_(offset) { } + + /// Obtains a TensorRef pointed to by the iterator + CUTLASS_HOST_DEVICE + TensorRef *operator() const { + TensorRef ref(ref_); + ref.add_pointer_offset(offset_); + return ref; + } + + /// Advances the iterator to point to the next tensor + CUTLASS_HOST_DEVICE + ConstIterator &operator++() { + offset_ += ref_.tensor_stride; + return *this; + } + + /// Advances the iterator to point to the next tensor + CUTLASS_HOST_DEVICE + ConstIterator operator++(int) { + ConstIterator ret(*this); + offset_ += ref_.tensor_stride; + return ret; + } + + /// Returns an iterator advanced by (idx) amount + CUTLASS_HOST_DEVICE + ConstIterator operator+(Index idx) { + return ConstIterator(ref, offset_ + ref_.tensor_stride * idx); + } + + /// Advances this iterator by (idx) and returns a reference to self + CUTLASS_HOST_DEVICE + ConstIterator &operator+=(Index idx) { + offset_ += ref_.tensor_stride * idx; + return *this; + } + + /// Moves to the previous tensor + CUTLASS_HOST_DEVICE + ConstIterator &operator--() { + offset_ -= ref_.tensor_stride; + return *this; + } + + /// Moves to the previous tensor + CUTLASS_HOST_DEVICE + ConstIterator operator--(int) { + ConstIterator ret(*this); + offset_ -= ref_.tensor_stride; + return ret; + } + + /// Returns an iterator moved forward by (idx) amount + CUTLASS_HOST_DEVICE + ConstIterator operator-(Index idx) { + return ConstIterator(ref_, offset_ - ref_.tensor_stride * idx); + } + + /// Moves this iterator by (idx) and returns a reference to self + CUTLASS_HOST_DEVICE + ConstIterator &operator-=(Index idx) { + offset_ -= ref_.tensor_stride * idx; + return *this; + } + + /// Returns the difference in offset between two iterators + CUTLASS_HOST_DEVICE + Stride operator-(ConstIterator const &it) { + return offset_ - it.offset_; + } + }; + + // + // Data members + // + + /// Stride between tensors + LongIndex tensor_stride; + + // + // Methods + // + + // Default ctor + CUTLASS_HOST_DEVICE + TensorRefBatchStrided(): tensor_stride(0) { } + + // Constructs form a tensor reference and + CUTLASS_HOST_DEVICE + TensorRefBatchStrided(TensorRef const &ref, LongIndex _tensor_stride = 0): + TensorRef(ref), + tensor_stride(_tensor_stride) { } + + /// Gets the pointer offset + CUTLASS_HOST_DEVICE + LongIndex get_pointer_offset(Index idx) const { + return idx * tensor_stride; + } + + // Returns a reference + CUTLASS_HOST_DEVICE + TensorRef at(Index idx) const { + TensorRef ref(*this); + ref.add_pointer_offset(get_pointer_offset(idx)); + return ref; + } + + /// Returns an iterator + CUTLASS_HOST_DEVICE + ConstIterator begin() { + return ConstIterator(*this); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// This satisfies TensorRefCollection and stores a collection of TensorRef objects. This is a +/// structure of arrays in that the individual members of the TensorRef are held in distinct arrays. +/// +/// Note, TensorRef maps a logical coordinate space to an n-D array with rank kStorageRank. It +/// maintains a stride vector of similar rank, but the least significant rank is defined to be 1. +/// +/// The least significant stride of 1 is not stored, and therefore the number of stride arrays is +/// kStorageRank - 1. +template < + /// Data type of element stored within tensor + typename Storage_, + /// Rank of logical tensor + int Rank_, + /// Maps a Coord in the logical tensor index space to the internal n-D array + typename MapFunc_ = IdentityTensorMapFunc, + /// Rank of internal n-D array + int StorageRank_ = MapFunc_::kStorageRank, + /// Index type used for coordinates + typename Index_ = int, + /// Index type used for offsets and pointer differences + typename LongIndex_ = long long +> +struct TensorRefArray { + // + // Type definitions + // + + /// TensorRef type obtained from the TensorRefArray + typedef TensorRef TensorRef; + + /// Element pointed to by the TensorRef + typedef Storage_ Storage; + + /// Index type + typedef Index_ Index; + + /// Typically, strides in memory can be very large + typedef LongIndex_ LongIndex; + + /// Rank of the stride vector + static int const kStorageRank = TensorRef::kStorageRank; + + /// TensorRefIterator over TensorRef objects in TensorRefArray + class ConstIterator { + public: + + /// TensorRef returned by the iterator + typedef Base TensorRef; + + private: + /// Reference to the TensorRefArray + TensorRefArray const &ref_; + + /// Index into TensorRefArray + int idx_; + + public: + + /// Constructs a ConstIterator over the TensorRef objects + CUTLASS_HOST_DEVICE + ConstIterator(TensorArrayRef const &ref, int idx = 0): ref_(ref), idx_(idx) { } + + /// Obtains a TensorRef pointed to by this iterator + CUTLASS_HOST_DEVICE + TensorRef *operator() const { + return ref_.reference(idx_); + } + + /// Advances to next TensorRef + CUTLASS_HOST_DEVICE + ConstIterator &operator++() { + ++idx_; + return *this; + } + + /// Advances to next TensorRef + CUTLASS_HOST_DEVICE + ConstIterator operator++(int) { + ConstIterator ret(*this); + idx_ ++; + return ret; + } + + CUTLASS_HOST_DEVICE + ConstIterator operator+(Index idx) { + return ConstIterator(ref_, idx_ + idx); + } + + CUTLASS_HOST_DEVICE + ConstIterator &operator+=(Index idx) { + idx_ += idx; + return *this; + } + + CUTLASS_HOST_DEVICE + ConstIterator &operator--() { + --idx_; + return *this; + } + + /// Advances to next TensorRef + CUTLASS_HOST_DEVICE + ConstIterator operator--(int) { + ConstIterator ret(*this); + --idx_; + return ret; + } + + CUTLASS_HOST_DEVICE + ConstIterator &operator-=(Index idx) { + idx_ -= idx; + return *this; + } + + CUTLASS_HOST_DEVICE + ConstIterator operator-(Index idx) { + return ConstIterator(ref_, idx_ + idx); + } + }; + + // + // Data members + // + + /// Base addresses + Storage **pointers; + + /// Array of strides + Index *strides[kStorageRank - 1]; + + // + // Methods + // + + // Default ctor + CUTLASS_HOST_DEVICE + TensorArrayRef() { } + + // Construct from pointers to arrays to strides + CUTLASS_HOST_DEVICE + TensorArrayRef( + Storage **_pointers, + Index _strides[kStorageRank - 1]): pointers(_pointers) { + + // Copy pointers to strides arrays + for (int i = 0; i < kStorageRank - 1; ++i) { + strides[i] = _strides[i]; + } + } + + // Returns a TensorRef at the given index in the collection + CUTLASS_HOST_DEVICE + TensorRef at(Index idx) const { + Coord stride; + CUTLASS_PRAGMA_UNROLL + for (int i = 0; i < kStorageRank - 1; ++i) { + stride[i] = stride_[idx][i]; + } + return TensorRef(pointers[idx], stride); + } + + /// Returns an TesnorRefIterator over the TensorRef objects in this collection + CUTLASS_HOST_DEVICE + ConstIterator begin() { + return ConstIterator(*this); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/tensor_view.h b/cutlass/tensor_view.h index 89c6bd571..4ef99e027 100644 --- a/cutlass/tensor_view.h +++ b/cutlass/tensor_view.h @@ -24,51 +24,110 @@ **************************************************************************************************/ /*! \file \brief Defines a structure containing strides and a pointer to tensor data. + + TensorView is derived from TensorRef and contributes bounds to the tensor's index space. Thus, + it is a complete mathematical object and may be used in tensor algorithms. It is decoupled from + data storage and is therefore lightweight and may be embedded in larger tensor objects or + memory structures. + + See cutlass/tensor_ref.h for more details about the mapping of the logical tensor index space to + linear memory. */ #pragma once #include -#include -#include +#include "cutlass/cutlass.h" +#include "cutlass/tensor_ref.h" namespace cutlass { //////////////////////////////////////////////////////////////////////////////////////////////////// -/// Host-side reference implementation of tensor operations -template -class TensorView : public TensorRef { +/// Defines a view into a logical tensor +template < + /// Data type of element stored within tensor + typename Storage_, + /// Rank of logical tensor + int Rank_ = 4, + /// Maps a Coord in the logical tensor index space to the internal n-D array + typename MapFunc_ = IdentityTensorMapFunc, + /// Rank of internal n-D array + int StorageRank_ = MapFunc_::kStorageRank, + /// Index type used for coordinates + typename Index_ = int, + /// Index type used for offsets and pointer differences + typename LongIndex_ = long long +> +class TensorView : public TensorRef { public: - /// Reference and stride - typedef TensorRef Base; + /// Base tensor reference + typedef TensorRef Base; - /// Reference and stride - typedef Base TensorRef_t; + /// Tensor reference to of constant value + typedef TensorRef< + typename platform::remove_const::type const, + Rank_, + MapFunc_, + StorageRank_, + Index_, + LongIndex_> ConstTensorRef; - /// Reference to constant type - typedef TensorRef ConstTensorRef_t; + /// Base tensor reference + typedef Base TensorRef; - /// Rank of tensor - static int const Rank = TensorRef_t::Rank; + /// Storage type + typedef typename Base::Storage Storage; + + /// Index type + typedef typename Base::Index Index; + + /// Coordinate in logical tensor space + typedef typename TensorRef::TensorCoord TensorCoord; + + /// Coordinate in storage n-D array + typedef typename TensorRef::StorageCoord StorageCoord; + + /// Stride vector in storage coordinate space + /// Least significant stride is = 1 and not stored + typedef typename TensorRef::StrideVector StrideVector; + + /// TensorView of constant value + typedef TensorView< + typename platform::remove_const::type const, + Rank_, + MapFunc_, + StorageRank_, + Index_, + LongIndex_> ConstTensorView; + + // + // Definitions included for backwards compatibility - to be removed in next major release + // + + /// Coordinate in logical tensor space + typedef TensorCoord Coord_t; + + /// Logical rank of tensor index space + static int const Rank = Base::kRank; /// Type used to compute the offset of an element to the base of a tensor - typedef int Offset_t; + typedef typename Base::LongIndex Offset_t; - /// Coordinate into tensor - typedef Coord Coord_t; + /// Base class + typedef TensorRef TensorRef_t; + + /// TensorRef to const-valued type + typedef typename TensorRef::ConstTensorRef ConstTensorRef_t; private: // // Data members // - /// Pointer to pitch-linear memory - TensorRef_t ref_; - /// Dimensions of coordinate (independent of stride) - Coord_t size_; + TensorCoord size_; public: // @@ -79,91 +138,126 @@ class TensorView : public TensorRef { CUTLASS_HOST_DEVICE TensorView() {} - /// Constructs a Tensor_view from a TensorRef and size + /// Constructs a TensorView from a TensorRef and size CUTLASS_HOST_DEVICE - TensorView(TensorRef_t const& _ref, Coord_t const& _size) : Base(_ref), size_(_size) {} + TensorView(Base const& _ref, TensorCoord const& _size) : Base(_ref), size_(_size) {} - /// Returns true if the Tensor_view is bound to some memory + /// Constructs a TensorView from a pointer, a stride vector, and size CUTLASS_HOST_DEVICE - bool good() const { return ref().good(); } + TensorView( + Storage *ptr, + StrideVector const &stride, + TensorCoord const& size + ): + Base(ptr, stride), size_(size) {} - /// Returns a pointer to data + /// Constructs a TensorView from a pointer, a stride vector, and size CUTLASS_HOST_DEVICE - T* data() const { return ref().data(); } + TensorView( + Storage *ptr, + StorageCoord const &stride, + TensorCoord const& size + ): + Base(ptr, stride), size_(size) {} /// Updates the reference and size of a Tensor_view object CUTLASS_HOST_DEVICE - void reset(TensorRef_t const& _ref = TensorRef_t(0), Coord_t const& _size = Coord_t()) { + void reset(Base const& _ref = Base(), TensorCoord const& _size = TensorCoord()) { Base::operator=(_ref); size_ = _size; } - /// Accesses the tensor reference pointing to data + /// Accesses the size CUTLASS_HOST_DEVICE - TensorRef_t& ref() { return *this; } - - /// - CUTLASS_HOST_DEVICE - ConstTensorRef_t const_ref() { return ConstTensorRef_t(data(), stride()); } - - /// Accesses the tensor reference pointing to data - CUTLASS_HOST_DEVICE - TensorRef_t const& ref() const { return *this; } + TensorCoord const& size() const { return size_; } /// Accesses the size CUTLASS_HOST_DEVICE - Coord_t const& size() const { return size_; } - - /// Accesses the size - CUTLASS_HOST_DEVICE - int size(int dim) const { return size_.at(dim); } - - /// Accesses the stride - CUTLASS_HOST_DEVICE - Coord_t const& stride() const { return ref().stride(); } - - /// Accesses the stride - CUTLASS_HOST_DEVICE - int const& stride(int dim) const { return ref().stride(dim); } + Index size(int dim) const { return size_.at(dim); } /// Assigns the Tensor_view CUTLASS_HOST_DEVICE TensorView& operator=(TensorView const& _tensor) { - Base::operator=(_tensor._ref); + Base::operator=(_tensor); size_ = _tensor.size_; return *this; } - /// Returns the index of an element - CUTLASS_HOST_DEVICE - Offset_t offset(Coord_t const& coord) const { return ref().offset(coord); } - /// Determines whether a location is within a tensor CUTLASS_HOST_DEVICE - bool contains(Coord_t const& coord) const { - for (int dim = 0; dim < Rank; ++dim) { - if (coord.at(dim) >= size_.at(dim)) { + bool contains(TensorCoord const& coord) const { + CUTLASS_PRAGMA_UNROLL + for (int dim = 0; dim < Rank_; ++dim) { + if (coord[dim] >= size_[dim]) { return false; } } return true; } - /// Element-wise accessor + /// Returns a TensorRef pointing to the first element of the tensor. CUTLASS_HOST_DEVICE - T& at(Coord_t const& coord) const { return ref().at(coord); } + TensorRef ref() const { + return TensorRef(*this); + } - /// Element-wise accessor - T& operator[](Coord const& coord) const { return at(coord); } - - /// Element-wise accessor + /// Returns a TensorRef pointing to the first element of the tensor. CUTLASS_HOST_DEVICE - T& at(Offset_t idx) const { return ref().at(idx); } + ConstTensorRef const_ref() const { + return ConstTensorRef(*this); + } /// Returns a Tensor_view given location and size quantities CUTLASS_HOST_DEVICE - TensorView subview(Coord_t const& location, Coord_t size) const { - return TensorView(ref() + location, size.clamp(size_ - location)); + TensorView subview(TensorCoord const& location, TensorCoord size) const { + return TensorView((*this) + location, size.clamp(size_ - location)); + } + + /// Returns the number of scalar elements needed to store tensor + CUTLASS_HOST_DEVICE + size_t capacity() const { + int max_rank = 0; + + StorageCoord mapped_size(this->map(size())); + + CUTLASS_PRAGMA_UNROLL + for (int i = 0; i < Base::kStorageRank; ++i) { + if (!i || + this->stride(i) * mapped_size[i] > this->stride(max_rank) * mapped_size[max_rank]) { + max_rank = i; + } + } + return this->stride(max_rank) * mapped_size[max_rank]; + } + + /// Returns a TensorView offset by a given amount + CUTLASS_HOST_DEVICE + TensorView operator+(TensorCoord const& b) const { + TensorView result(*this); + result.add_pointer_offset(this->offset(b)); + return result; + } + + /// Returns a TensorRef offset by a given amount + CUTLASS_HOST_DEVICE + TensorView& operator+=(TensorCoord const& b) { + this->add_pointer_offset(this->offset(b)); + return *this; + } + + /// Returns a TensorRef offset by a given amount + CUTLASS_HOST_DEVICE + TensorView operator-(TensorCoord const& b) const { + TensorRef result(*this); + result.add_pointer_offset(-this->offset(b)); + return result; + } + + /// Returns a TensorRef offset by a given amount + CUTLASS_HOST_DEVICE + TensorView& operator-=(TensorCoord const& b) { + this->add_pointer_offset(-this->offset(b)); + return *this; } }; diff --git a/cutlass/tile_allocation.h b/cutlass/tile_allocation.h new file mode 100644 index 000000000..81db797f9 --- /dev/null +++ b/cutlass/tile_allocation.h @@ -0,0 +1,143 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines a fragment based on a Shape<> template. +*/ +#pragma once + +#include "cutlass/shape.h" +#include "cutlass/fragment.h" +#include "cutlass/tensor_ref.h" +#include "cutlass/zip_tensor_ref.h" + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Class for storing a tile in memory and accessing it through a tensor ref +template +struct TileAllocation { + // + // Type definitions + // + + /// Scalar element + typedef Scalar_ Scalar; + + /// The actual storage (may differ from the scalar type) + typedef typename StorageType::Type Storage; + + /// Size of the allocation in units of scalars + typedef Shape_ Shape; + + /// Strides + typedef typename ShapeStrides::Shape Strides; + + /// Defines the tensor reference for this allocation + typedef TensorRef ConstTensorRef; + + /// Defines the tensor reference for this allocation + typedef TensorRef TensorRef; + + // + // Data members + // + + /// Storage + Storage storage[Shape::kD][Shape::kH][Shape::kW][Shape::kC]; + + // + // Methods + // + + /// Returns a pointer to the raw data + CUTLASS_DEVICE + Scalar *data() { return reinterpret_cast(&storage[0][0][0][0]); } + + /// Returns a const pointer to the raw data + CUTLASS_DEVICE + Scalar const *data() const { return reinterpret_cast(&storage[0][0][0][0]); } + + /// Returns a TensorRef object pointing to the data + CUTLASS_DEVICE + TensorRef reference() { + return TensorRef(data(), make_Coord(Strides::kD, Strides::kH, Strides::kW, Strides::kC)); + } + + /// Returns a TensorRef object pointing to the data + CUTLASS_DEVICE + ConstTensorRef reference() const { + return ConstTensorRef(data(), make_Coord(Strides::kD, Strides::kH, Strides::kW, Strides::kC)); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Manages a pair of tile allocations as if they are one allocation +template +struct ZipTileAllocation { + // + // Type definitions + // + + /// First tensor allocation + typedef First_ First; + + /// Second tensor allocation + typedef Second_ Second; + + /// Defines the tensor reference for this allocation + typedef ZipTensorRef TensorRef; + + /// Defines the tensor reference for this allocation + typedef ZipTensorRef + ConstTensorRef; + + // + // Data members + // + + /// First tensor allocation + First first; + + /// Second tensor allocation + Second second; + + // + // Methods + // + + /// Returns a TensorRef object pointing to the data + CUTLASS_DEVICE + TensorRef reference() { return TensorRef(first.reference(), second.reference()); } + + /// Returns a TensorRef object pointing to the data + CUTLASS_DEVICE + ConstTensorRef reference() const { return ConstTensorRef(first.reference(), second.reference()); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/tile_coord.h b/cutlass/tile_coord.h new file mode 100644 index 000000000..b3d809bc3 --- /dev/null +++ b/cutlass/tile_coord.h @@ -0,0 +1,194 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines a coordinate used for the CUTLASS 4-D tile structure. +*/ + +#pragma once + +#include "cutlass/coord.h" + +namespace cutlass { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// TileCoord wraps Coord<4, int> to provide a helper for accessing named dimensions. Classes +/// expecting a coordinate in the rank=4 index space of a CUTLASS tile structure should use TileCoord. +template +struct TileCoord : public Coord<4, Index_> { + + /// Index type + typedef Index_ Index; + + /// Underlying Coord<4> + typedef Coord<4, Index> Base; + + /// D dimension + static int kD = 0; + + /// H dimension + static int kH = 1; + + /// W dimension + static int kW = 2; + + /// C dimension + static int kC = 3; + + // + // Methods + // + + /// Default ctor + CUTLASS_HOST_DEVICE + TileCoord() { } + + /// Constructs from Coord<3> and infers coord[kC] = 0 + CUTLASS_HOST_DEVICE + TileCoord(Coord<3, Index> const &coord): + Base(make_Coord(coord[0], coord[1], coord[2], 0)) { } + + /// Constructs from Coord<4> + CUTLASS_HOST_DEVICE + TileCoord(Coord<4, Index> const &coord): Base(coord) { } + + /// Constructs from an array of coordinate elements + CUTLASS_HOST_DEVICE + TileCoord(Index coord[4]): Base(coord) { } + + /// Helper to construct from a row and column + CUTLASS_HOST_DEVICE + TileCoord(Index d, Index h, Index w, Index c): Base(make_Coord(d, h, w, c)) { } + + /// Returns the D element of the coordinate + CUTLASS_HOST_DEVICE + Index const & d() const { return this->at(kD); } + + /// Returns the D element of the coordinate + CUTLASS_HOST_DEVICE + Index & d() { return this->at(kD); } + + /// Returns the H element of the coordinate + CUTLASS_HOST_DEVICE + Index const & h() const { return this->at(kH); } + + /// Returns the H element of the coordinate + CUTLASS_HOST_DEVICE + Index & h() { return this->at(kH); } + + /// Returns the W element of the coordinate + CUTLASS_HOST_DEVICE + Index const & w() const { return this->at(kW); } + + /// Returns the W element of the coordinate + CUTLASS_HOST_DEVICE + Index & w() { return this->at(kW); } + + /// Returns the Celement of the coordinate + CUTLASS_HOST_DEVICE + Index const & c() const { return this->at(kC); } + + /// Returns the C element of the coordinate + CUTLASS_HOST_DEVICE + Index & c() { return this->at(kC); } + + /// Gets H and W dimensions as a Coord<2> + CUTLASS_HOST_DEVICE + Coord<2> hw() const { + return make_Coord(h(), w()); + } + + /// Gets H, W, and C dimensions as a Coord<3> + CUTLASS_HOST_DEVICE + Coord<3> hwc() const { + return make_Coord(h(), w(), c()); + } + + /// Gets D, H, and W dimensions as a Coord<3> + CUTLASS_HOST_DEVICE + Coord<3> dhw() const { + return make_Coord(d(), h(), w()); + } + + // + // Coord operators + // + + /// Element-wise addition + CUTLASS_HOST_DEVICE + TileCoord operator+(Base const& b) const { + return TileCoord(Base::operator+(b)); + } + + /// Element-wise subtraction + CUTLASS_HOST_DEVICE + TileCoord operator-(Base const& b) const { + return TileCoord(Base::operator-(b)); + } + + /// Element-wise multiplication + CUTLASS_HOST_DEVICE + TileCoord operator*(Base const& b) const { + return TileCoord(Base::operator*(b)); + } + + /// Element-wise division + CUTLASS_HOST_DEVICE + TileCoord operator/(Base const& b) const { + return TileCoord(Base::operator/(b)); + } + + /// In-place addition + CUTLASS_HOST_DEVICE + TileCoord& operator+=(Base const& b) { + Base::operator+=(b); + return *this; + } + + /// In-place subtraction + CUTLASS_HOST_DEVICE + TileCoord& operator-=(Base const& b) { + Base::operator-=(b); + return *this; + } + + /// In-place multiplication + CUTLASS_HOST_DEVICE + TileCoord& operator*=(Base const& b) { + Base::operator*=(b); + return *this; + } + + /// In-place division + CUTLASS_HOST_DEVICE + TileCoord& operator/=(Base const& b) { + Base::operator/=(b); + return *this; + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/tile_iterator.h b/cutlass/tile_iterator.h index 5d39c4f80..51e577949 100644 --- a/cutlass/tile_iterator.h +++ b/cutlass/tile_iterator.h @@ -28,10 +28,13 @@ */ #pragma once -#include -#include -#include -#include +#include "cutlass/coord.h" +#include "cutlass/tensor_ref.h" +#include "cutlass/fragment.h" +#include "cutlass/load_store.h" +#include "cutlass/predicate_vector.h" +#include "cutlass/vector.h" +#include namespace cutlass { @@ -61,12 +64,6 @@ as a Coord<4>. struct IteratorAdvance { enum Kind { kD, kH, kW }; }; - -/// Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix -struct IteratorFragment { - enum Kind { kScalar, kWmmaMatrix }; -}; - /////////////////////////////////////////////////////////////////////////////////////////////////// /** @@ -77,7 +74,7 @@ template + int AccessSize> struct TileTraits { /// Shape of the tile typedef Tile_ Tile; @@ -89,11 +86,52 @@ struct TileTraits { typedef Iterations_ Iterations; /// Functor that returns the logical coordinate of each entity's initial offset in the tile + // + // ThreadOffset should be a functor defined like: + // + // struct ThreadOffsetExample { + // CUTLASS_DEVICE + // Coord<4> operator()() const { + // return make_Coord(0, threadIdx.y, threadIdx.x, 0); + // } + // }; + // typedef ThreadOffset_ ThreadOffset; + + /// Strides for immediate offset computation + typedef Shape<0, 0, 0, 0> ImmediateOffsetStrides; + + /// Access size + static int const kAccessSize = AccessSize; }; /////////////////////////////////////////////////////////////////////////////////////////////////// +/// Functor computing a predicate given the logical position of an access +template +struct RegularTilePredicateFunctor { + typedef Delta_ Delta; + + /// Dimensions of the bounding volume + Coord<3> bounds; + + /// Constructs a predicate functor given the bounds of a tensor + CUTLASS_HOST_DEVICE + RegularTilePredicateFunctor(Coord<3> _bounds) : bounds(_bounds) {} + + /// Computes the predicate given the logical position of an access + CUTLASS_HOST_DEVICE + bool operator()(Coord<3> iteration, Coord<3> offset) const { + return (iteration[0] * Delta::kD + offset[0] < bounds[0]) && + (iteration[1] * Delta::kH + offset[1] < bounds[1]) && + (iteration[2] * Delta::kW + offset[2] < bounds[2]); + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct DumpType {}; /// Iterator for accessing a stripmined tile in memory template > struct TileIteratorBase { /// concept TileTraits @@ -117,7 +155,7 @@ struct TileIteratorBase { static IteratorAdvance::Kind const kAdvance = Advance_; /// Specifies iterator storage fragment type (Scalar or WmmaMatrix) - static IteratorFragment::Kind const kIteratorFragment = IteratorFragment_; + static FragmentElementType::Kind const kFragmentElementType = FragmentElementType_; /// Source or destination memory space static MemorySpace::Kind const kMemorySpace = MemorySpace; @@ -144,18 +182,19 @@ struct TileIteratorBase { typedef typename Traits::ThreadOffset ThreadOffset; /// The number of scalars accessed per load/store. - static int const kAccessSize = Tile::kC; + static int const kAccessSize = Traits::kAccessSize; /// The elements loaded/store by one instruction. typedef typename Vectorize::Type AccessType; /// The size of storage needed per fragment static int const kFragmentSize = - (kIteratorFragment == IteratorFragment::kWmmaMatrix ? 16 : sizeof(AccessType)); + (kFragmentElementType == FragmentElementType::kWmmaMatrix ? 16 : sizeof(AccessType)); /// The storage. typedef Fragment::kCount, kFragmentSize> Storage; /// The fragment. typedef Fragment::kCount * kAccessSize> Fragment; + /// The fragment iterator. typedef FragmentIterator FragmentIterator; /// The fragment const iterator. @@ -172,25 +211,61 @@ struct TileIteratorBase { /// Parameters to the iterator struct Params { - Index stride_d; + + // + // Dat members + // + + long long stride_d; Index stride_h; Index stride_w; - Index inc_d; + long long inc_d; Index inc_h; Index inc_w; - Index inc_advance; + long long inc_advance; + + // + // Methods + // + + /// Constructs params + CUTLASS_HOST_DEVICE + Params() : stride_d(0), stride_h(0), stride_w(0), inc_d(0), inc_h(0), inc_w(0) {} + + /// Constructs params + CUTLASS_HOST_DEVICE + Params(long long _stride_d, + Index _stride_h, + Index _stride_w, + long long _inc_d, + Index _inc_h, + Index _inc_w, + long long _inc_advance) + : stride_d(_stride_d), + stride_h(_stride_h), + stride_w(_stride_w), + inc_d(_inc_d), + inc_h(_inc_h), + inc_w(_inc_w), + inc_advance(_inc_advance) {} + + /// Constructs params with a stride vector + CUTLASS_HOST_DEVICE + Params(Coord<4> const &stride) { + initialize(stride); + } /// Initializes params CUTLASS_HOST_DEVICE - int initialize(Index _stride_d, + int initialize(long long _stride_d, Index _stride_h, Index _stride_w, - Index _inc_d, + long long _inc_d, Index _inc_h, Index _inc_w, - Index _inc_advance) { + long long _inc_advance) { stride_d = _stride_d; stride_h = _stride_h; stride_w = _stride_w; @@ -203,61 +278,79 @@ struct TileIteratorBase { return 0; } + /// Initializes the parameters object from a vector of strides CUTLASS_HOST_DEVICE - int initialize(Index _stride_d, Index _stride_h, Index _stride_w) { + int initialize(Coord<4> const &stride) { + return initialize(stride[0], stride[1], stride[2]); + } + + /// Initializes the parameters object from a vector of strides + CUTLASS_HOST_DEVICE + int initialize(long long _stride_d, Index _stride_h, Index _stride_w) { stride_d = _stride_d; stride_h = _stride_h; stride_w = _stride_w; inc_w = stride_w * Delta::kW; inc_h = stride_h * Delta::kH - stride_w * Delta::kW * (Iterations::kW - 1); + inc_d = stride_d * Delta::kD - stride_h * Delta::kH * (Iterations::kH - 1) - + stride_w * Delta::kW * (Iterations::kW - 1); + + inc_advance = 0; if (kAdvance == IteratorAdvance::kH) { // Advance in the H dimension. - inc_d = 0; + inc_advance = Tile::kH * stride_h; } else if (kAdvance == IteratorAdvance::kW) { // Advance in the W dimension. - inc_d = stride_w * Tile::kW - stride_h * Tile::kH; + inc_advance = Tile::kW * stride_w; + } else { // Advance in the D dimension. - inc_d = stride_d; + inc_advance = Tile::kD * stride_d; } - inc_advance = 0; + inc_advance -= stride_d * Delta::kD * (Iterations::kD - 1) + + stride_h * Delta::kH * (Iterations::kH - 1) + + stride_w * Delta::kW * (Iterations::kW - 1); return 0; } + /// Gotta have this CUTLASS_HOST_DEVICE int initialize() { stride_d = 0; stride_h = 0; stride_w = 1; - inc_d = inc_h = inc_w = inc_advance = 0; + inc_advance = 0; + inc_d = inc_h = inc_w = 0; return 0; } }; /// Is the iterator valid? - CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; } + CUTLASS_HOST_DEVICE bool valid(int d, int h, int w, int c) const { return true; } // // Static function members // /// Initializes a predicate vector - template - CUTLASS_DEVICE static void initialize_predicates(PredicateIterator predicate_it, - Coord<3> const &bounds, - Coord<3> const &offset = make_Coord(0, 0, 0)) { + template + CUTLASS_HOST_DEVICE static void initialize_predicates(PredicateIterator predicate_it, + PredicateFunctor const &predicate_func, + Coord<3> const &offset) { + CUTLASS_PRAGMA_UNROLL for (int d = 0; d < Iterations::kD; ++d) { - bool enable_d = (d * Delta::kD + offset[0] < bounds[0]); + CUTLASS_PRAGMA_UNROLL for (int h = 0; h < Iterations::kH; ++h) { - bool enable_h = (h * Delta::kH + offset[1] < bounds[1]); + CUTLASS_PRAGMA_UNROLL for (int w = 0; w < Iterations::kW; ++w) { - bool enable_w = (w * Tile::kC * Delta::kW + offset[2] < bounds[2]); - predicate_it.set(d, h, w, 0, enable_d && enable_h && enable_w); + bool enable = predicate_func(make_Coord(d, h, w), offset); + predicate_it.set(enable); + ++predicate_it; } } } @@ -301,7 +394,7 @@ template > struct TileLoadIterator : public TileIteratorBase { /// Base class typedef TileIteratorBase Base; @@ -329,13 +422,13 @@ struct TileLoadIterator : public TileIteratorBase TensorRef; + /// Parameters struct Params : public BaseParams { /// Pointer to memory Scalar const *pointer; + // + // Methods + // + + /// Initialize params to access storage object + CUTLASS_HOST_DEVICE + Params() : pointer(0){ Base::Params::initialize(); } + + /// Initialize params to access storage object + CUTLASS_HOST_DEVICE + Params(Scalar const *ptr) : pointer(ptr) { Base::Params::initialize(); } + + /// Constructs with a CompactTensorRef<> + CUTLASS_HOST_DEVICE + Params(TensorRef const &ref): pointer(ref.data()) { + Base::Params::initialize(ref.stride()); + } + + /// Initialize params to access storage object + CUTLASS_HOST_DEVICE + Params(Scalar const *ptr, + long long _stride_d, + Index _stride_h, + Index _stride_w, + long long _inc_d, + Index _inc_h, + Index _inc_w, + Index _inc_advance) + : pointer(ptr) { + Base::Params::initialize( + _stride_d, _stride_h, _stride_w, _inc_d, _inc_h, _inc_w, _inc_advance); + } + + /// Initialize params to access storage object + CUTLASS_HOST_DEVICE + Params(Scalar const *ptr, long long stride_d, Index stride_h, Index stride_w) + : pointer(ptr) { + Base::Params::initialize(stride_d, stride_h, stride_w); + } + + /// Initializes params to access a raw pointer + CUTLASS_HOST_DEVICE + int initialize(TensorRef const &ref) { + pointer = ref.data(); + return Base::Params::initialize(ref.stride()); + } + /// Initialize params to access storage object CUTLASS_HOST_DEVICE int initialize(SharedStorage const &storage) { pointer = &storage[0]; + Base::Params::initialize(); + return 0; + } + + /// Initialize params to access storage object + CUTLASS_HOST_DEVICE + int initialize(Scalar const *ptr) { + pointer = ptr; + Base::Params::initialize(); return 0; } /// Initializes params to access a raw pointer CUTLASS_HOST_DEVICE - int initialize(Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w) { + int initialize(Scalar const *ptr, long long stride_d, Index stride_h, Index stride_w) { Base::Params::initialize(stride_d, stride_h, stride_w); pointer = ptr; return 0; @@ -411,10 +566,10 @@ struct TileLoadIterator : public TileIteratorBase + /// Initializes a predicate vector using a RegularTilePredicateFunctor + template < + /// Predicate iterator + typename PredicateIterator> CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it, Coord<3> const &bounds, Coord<3> const &block_offset = make_Coord(0, @@ -455,8 +612,23 @@ struct TileLoadIterator : public TileIteratorBase(bounds), + block_offset + make_Coord(thread_offset[0], thread_offset[1], thread_offset[2])); + } + + /// Initializes a predicate vector using an arbitrary predicate functor + template < + /// Predicate iterator + typename PredicateIterator, + /// Functor computing predicates + typename PredicateFunctor> + CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it, + PredicateFunctor const &functor, + Coord<3> const &block_offset) { + Base::initialize_predicates( + predicate_it, + functor, + block_offset + make_Coord(thread_offset[0], thread_offset[1], thread_offset[2])); } // @@ -475,41 +647,27 @@ struct TileLoadIterator : public TileIteratorBase const &block_offset = make_Coord(0, 0, 0), ThreadOffset thread_offset_func = ThreadOffset()) : stage(0) { - int const offset = thread_offset_func()[2]; - params.pointer = &shared_storage[offset]; - } + params.pointer = ptr + thread_offset_func()[2]; - /// Returns the current pointer - CUTLASS_HOST_DEVICE - Scalar const *data() const { return params.pointer; } + params.stride_d = 0; + params.stride_h = 0; + params.stride_w = 1; - /// The accessor. - CUTLASS_DEVICE void get(AccessType &value, int d, int h, int w, int c) const { - int const imm = - ComputeOffsetFromStrides::get(d, h, w, c); - Load::load(value, params.pointer, imm); + params.inc_d = params.inc_h = params.inc_w = params.inc_advance = 0; } /// Increment in the D dimension @@ -524,8 +682,21 @@ struct TileLoadIterator : public TileIteratorBase::get(d, h, w, c); + Load::load(value, params.pointer, offset); + } + /// Increment the stage. - CUTLASS_DEVICE void inc_stage() { + CUTLASS_HOST_DEVICE void inc_stage() { if (Tile::kD > 1) { int const kStageSize = Tile::kH * Tile::kW * Tile::kC; if (stage == Tile::kD - 1) { @@ -538,7 +709,27 @@ struct TileLoadIterator : public TileIteratorBase const &offset) { + long long _offset = offset.template dot( + make_Coord(params.stride_d, params.stride_h, params.stride_w) + ); + + params.pointer += _offset; + return *this; + } + + /// Adds a raw offset to the pointer + CUTLASS_HOST_DEVICE void add_pointer_offset(Index offset) { params.pointer += offset; } + + CUTLASS_HOST_DEVICE Index stride_advance(void) { + Index stride = params.stride_h; + if (kAdvance == IteratorAdvance::kW) { + stride = params.stride_w; + } + return stride; + } + /// Loads a fragment and advances the iterator to the next tile. template CUTLASS_HOST_DEVICE void load_post_increment(Fragment &fragment, PredicateIterator pred_it) { @@ -547,11 +738,12 @@ struct TileLoadIterator : public TileIteratorBase::load( - reinterpret_cast(frag_iterator.at(d, h, w, 0)), data(), 0); + for (int c = 0; c < Iterations::kC; ++c) { + if (*pred_it) { + load_element( + reinterpret_cast(frag_iterator.at(d, h, w, c)), d, h, w, c); + } } - if (w < Iterations::kW - 1) { inc_w(); } @@ -587,6 +779,19 @@ struct TileLoadIterator : public TileIteratorBase + CUTLASS_HOST_DEVICE void load(Fragment &fragment, int d) { + FragmentIterator frag_iterator(fragment); + for (int h = 0; h < Iterations::kH; ++h) { + for (int w = 0; w < Iterations::kW; ++w) { + for (int c = 0; c < Iterations::kC; ++c) { + load_element(reinterpret_cast(frag_iterator.at(0, h, w, c)), d, h, w, c); + } + } + } + } }; /////////////////////////////////////////////////////////////////////////////////////////////////// @@ -626,7 +831,7 @@ template > struct TileStoreIterator : public TileIteratorBase { /// Base class typedef TileIteratorBase Base; @@ -660,11 +865,14 @@ struct TileStoreIterator : public TileIteratorBase TensorRef; + /// Parameters struct Params : public BaseParams { /// Pointer to memory Scalar *pointer; + // + // Methods + // + + // Default constructor + CUTLASS_HOST_DEVICE + Params() : pointer(0) {} + + // Default constructor + CUTLASS_HOST_DEVICE + Params(Scalar *ptr) : pointer(ptr) { Base::Params::initialize(); } + + /// Constructs with a CompactTensorRef<> + CUTLASS_HOST_DEVICE + Params(TensorRef const &ref): pointer(ref.data()) { + Base::Params::initialize(ref.stride()); + } + + // Default constructor + CUTLASS_HOST_DEVICE + Params(Scalar *ptr, long long stride_d, Index stride_h, Index stride_w) { + initialize(ptr, stride_d, stride_h, stride_w); + } + + // Default constructor + CUTLASS_HOST_DEVICE + Params(Scalar *ptr, + long long _stride_d, + Index _stride_h, + Index _stride_w, + long long _inc_d, + Index _inc_h, + Index _inc_w, + Index _inc_advance) { + initialize(ptr, _stride_d, _stride_h, _stride_w, _inc_d, _inc_h, _inc_w, _inc_advance); + } + /// Initialize params to access storage object CUTLASS_HOST_DEVICE int initialize(SharedStorage &storage) { pointer = &storage[0]; - return 0; + return Base::Params::initialize(); + } + + /// Initialize params to access storage object + CUTLASS_HOST_DEVICE + int initialize(Scalar *ptr) { + pointer = ptr; + return Base::Params::initialize(); } /// Initializes params to access a raw pointer CUTLASS_HOST_DEVICE - int initialize(Scalar *ptr, Index stride_d, Index stride_h, Index stride_w) { + int initialize(Scalar *ptr, long long stride_d, Index stride_h, Index stride_w) { Base::Params::initialize(stride_d, stride_h, stride_w); pointer = ptr; return 0; @@ -730,10 +988,10 @@ struct TileStoreIterator : public TileIteratorBase + /// Initializes a predicate vector using a RegularTilePredicateFunctor + template < + /// Predicate iterator + typename PredicateIterator> CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it, Coord<3> const &bounds, Coord<3> const &block_offset = make_Coord(0, @@ -774,8 +1034,23 @@ struct TileStoreIterator : public TileIteratorBase(bounds), + block_offset + make_Coord(thread_offset[0], thread_offset[1], thread_offset[2])); + } + + /// Initializes a predicate vector using an arbitrary predicate functor + template < + /// Predicate iterator + typename PredicateIterator, + /// Functor computing predicates + typename PredicateFunctor> + CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it, + PredicateFunctor const &functor, + Coord<3> const &block_offset) { + Base::initialize_predicates( + predicate_it, + functor, + block_offset + make_Coord(thread_offset[0], thread_offset[1], thread_offset[2])); } // @@ -794,25 +1069,22 @@ struct TileStoreIterator : public TileIteratorBase const &block_offset = make_Coord(0, 0, 0), - ThreadOffset thread_offset_func = ThreadOffset()) + TileStoreIterator(Params const &, Scalar *ptr, ThreadOffset thread_offset_func = ThreadOffset()) : stage(0) { - int const offset = thread_offset_func()[2]; - params.pointer = &shared_storage[offset]; - } + params.pointer = ptr + thread_offset_func()[2]; + params.stride_d = 0; + params.stride_h = 0; + params.stride_w = 1; - /// Returns the current pointer - CUTLASS_HOST_DEVICE - Scalar *data() const { return params.pointer; } + params.inc_d = params.inc_h = params.inc_w = params.inc_advance = 0; + } /// Increment in the D dimension CUTLASS_HOST_DEVICE void inc_d() { params.pointer += params.inc_d; } @@ -827,7 +1099,7 @@ struct TileStoreIterator : public TileIteratorBase 1) { int const kStageSize = Tile::kH * Tile::kW * Tile::kC; if (stage == Tile::kD - 1) { @@ -840,25 +1112,43 @@ struct TileStoreIterator : public TileIteratorBase::get(d, h, w, c); - Store::store(value, params.pointer, imm); + /// Adds a vector offset to the iterator + CUTLASS_HOST_DEVICE TileStoreIterator & operator+=(Coord<3> const &offset) { + params.pointer += offset.template dot( + make_Coord(params.stride_d, params.stride_h, params.stride_w) + ); + return *this; + } + + /// Adds a raw offset to the pointer + CUTLASS_HOST_DEVICE void add_pointer_offset(Index offset) { params.pointer += offset; } + + /// Stores a single fragment element into memory. + CUTLASS_HOST_DEVICE void store_element(AccessType const &value, int d, int h, int w, int c) { + int const offset = + ComputeOffsetFromStrides::get(d, h, w, c); + Store::store(value, params.pointer, offset); } - public: /// Stores a fragment and advances to the next tile. template - CUTLASS_HOST_DEVICE void store_post_increment(Fragment &fragment, PredicateIterator pred_it) { - FragmentIterator frag_iterator(fragment); + CUTLASS_HOST_DEVICE void store_post_increment(Fragment const &fragment, PredicateIterator pred_it) { + FragmentConstIterator frag_iterator(fragment); for (int d = 0; d < Iterations::kD; ++d) { for (int h = 0; h < Iterations::kH; ++h) { for (int w = 0; w < Iterations::kW; ++w, ++pred_it) { - if (*pred_it) { - Store::store( - reinterpret_cast(frag_iterator.at(d, h, w, 0)), data(), 0); + for (int c = 0; c < Iterations::kC; ++c) { + if (*pred_it) { + store_element( + reinterpret_cast(frag_iterator.at(d, h, w, c)), d, h, w, c); + } } if (w < Iterations::kW - 1) { inc_w(); @@ -877,23 +1167,103 @@ struct TileStoreIterator : public TileIteratorBase - CUTLASS_HOST_DEVICE void store_post_increment(Fragment &fragment) { + CUTLASS_HOST_DEVICE void store_post_increment(Fragment const &fragment) { typename PredicateVector::TrivialIterator pred_it; store_post_increment(fragment, pred_it); } /// Stores a fragment without advancing the iterator. template - CUTLASS_HOST_DEVICE void store(Fragment &fragment, PredicateIterator pred_it) const { + CUTLASS_HOST_DEVICE void store(Fragment const &fragment, PredicateIterator pred_it) const { TileStoreIterator _store_it(*this); _store_it.store_post_increment(fragment, pred_it); } /// Stores a fragment without advancing the iterator. template - CUTLASS_HOST_DEVICE void store(Fragment &fragment) const { + CUTLASS_HOST_DEVICE void store(Fragment const &fragment) const { typename PredicateVector::TrivialIterator pred_it; store(fragment, pred_it); } + + /// Loads a single fragment element from memory + CUTLASS_HOST_DEVICE void load_element(AccessType &value, int d, int h, int w, int c) const { + int const offset = + ComputeOffsetFromStrides::get(d, h, w, c); + + Load::load(value, params.pointer, offset); + } + + /// Loads a fragment and advances the iterator to the next tile. + template + CUTLASS_HOST_DEVICE void load_post_increment(Fragment &fragment, PredicateIterator pred_it) { + FragmentIterator frag_iterator(fragment); + + for (int d = 0; d < Iterations::kD; ++d) { + for (int h = 0; h < Iterations::kH; ++h) { + for (int w = 0; w < Iterations::kW; ++w, ++pred_it) { + for (int c = 0; c < Iterations::kC; ++c) { + if (*pred_it) { + load_element( + reinterpret_cast(frag_iterator.at(d, h, w, c)), d, h, w, c); + } + } + if (w < Iterations::kW - 1) { + inc_w(); + } + } + if (h < Iterations::kH - 1) { + inc_h(); + } + } + if (d < Iterations::kD - 1) { + inc_d(); + } + } + inc_advance(); + } + + /// Loads a fragment and advances the iterator to the next tile. + template + CUTLASS_HOST_DEVICE void load_post_increment(Fragment &fragment) { + typename PredicateVector::TrivialIterator pred_it; + load_post_increment(fragment, pred_it); + } + + /// Loads a fragment without advancing the iterator.. + template + CUTLASS_HOST_DEVICE void load(Fragment &fragment, PredicateIterator pred_it) const { + TileStoreIterator _load_it(*this); + _load_it.load_post_increment(fragment, pred_it); + } + + /// Loads a fragment without advancing the iterator.. + template + CUTLASS_HOST_DEVICE void load(Fragment &fragment) const { + typename PredicateVector::TrivialIterator pred_it; + load(fragment, pred_it); + } + + /// Loads a fragment without advancing the iterator.. + template + CUTLASS_HOST_DEVICE void load(Fragment &fragment, int d) { + FragmentIterator frag_iterator(fragment); + for (int h = 0; h < Iterations::kH; ++h) { + for (int w = 0; w < Iterations::kW; ++w) { + for (int c = 0; c < Iterations::kC; ++c) { + load_element(reinterpret_cast(frag_iterator.at(0, h, w, c)), d, h, w, c); + } + } + } + } }; -} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/tile_stream.h b/cutlass/tile_stream.h new file mode 100644 index 000000000..7790605a0 --- /dev/null +++ b/cutlass/tile_stream.h @@ -0,0 +1,378 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements the tile stream concept, composing an iterator with a transformation. Offers + split-phase semantics, separating the initiation of an asynchronous memory operation with a + fence forcing it to complete. +*/ +#pragma once + +// clang-format off + +#include "cutlass/convert.h" +#include "cutlass/tile_iterator.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Generic stream for loading and transforming fragments +template > +struct TileLoadStream { + // + // Type definitions + // + + /// TileLoadIterator + typedef Iterator_ Iterator; + + /// Transformer + typedef Transformer_ Transformer; + + /// Fragment fetched from source memory + typedef typename Iterator::Fragment Fragment; + + /// Output fragment from transformer + typedef typename Transformer::OutputFragment TransformedFragment; + + /// Tensor reference expected by the stream + typedef typename Iterator::TensorRef TensorRef; + + /// Empty predicate vector struct + struct PredicateVector {}; + + /// Index type + typedef typename Iterator::Index Index; + + /// Parameters object used to construct generic load stream + struct Params { + /// Parameters to the iterator + typename Iterator::Params iterator; + + // + // Methods + // + + /// Default constructor + CUTLASS_HOST_DEVICE + Params() {} + + /// Constructor with iterator params + CUTLASS_HOST_DEVICE + Params(typename Iterator::Params const &_iterator) : iterator(_iterator) {} + }; + + // + // Data members + // + + /// Iterator to load tiles + Iterator iterator; + + /// Fragment loaded via iterator + Fragment fetched_fragment; + + /// Transformation applied to fragments + Transformer transformer; + + /// Transformed fragment from transformer + TransformedFragment transformed_fragment; + + // + // Methods + // + + /// Ctor + CUTLASS_DEVICE + TileLoadStream(Params const &_params, TensorRef const &_ref) + : iterator(_params.iterator, _ref) {} + + /// Ctor + CUTLASS_DEVICE + TileLoadStream(Params const &_params, + Coord<3> const &threadblock_offset = make_Coord(0, 0, 0) + ): iterator(_params.iterator, threadblock_offset) { } + + /// Loads a tile and increments the iterator + CUTLASS_DEVICE + void copy() { iterator.load_post_increment(fetched_fragment); } + + /// Commits the fetched fragment and applies a transformation + CUTLASS_DEVICE + void commit() { transformer.transform(fetched_fragment, transformed_fragment); } + + /// Accesses the loaded, transformed fragment + CUTLASS_DEVICE + Fragment &intermediate_fragment() { return fetched_fragment; } + + /// Accesses the loaded, transformed fragment + CUTLASS_DEVICE + TransformedFragment &fragment() { return transformed_fragment; } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Generic stream for transforming and storing fragments +template > +struct TileStoreStream { + // + // Type definitions + // + + /// TileLoadIterator + typedef Iterator_ Iterator; + + /// Transformer + typedef Transformer_ Transformer; + + /// Source fragment + typedef typename Transformer::InputFragment Fragment; + + /// Transformed fragment, compatible with Iterator::Fragment + typedef typename Transformer::OutputFragment TransformedFragment; + + /// Tensor reference expected by the underlying iterator + typedef typename Iterator::TensorRef TensorRef; + + /// Empty predicate vector struct + struct PredicateVector {}; + + /// Index type + typedef typename Iterator::Index Index; + + /// Parameters used to construct the stream + struct Params { + /// Parameters to the iterator + typename Iterator::Params iterator; + + // + // Methods + // + + /// Default constructor + CUTLASS_HOST_DEVICE + Params() {} + + /// Constructor with iterator params + CUTLASS_HOST_DEVICE + Params(typename Iterator::Params const &_iterator) : iterator(_iterator) {} + }; + + // + // Data members + // + + /// Iterator to store tiles + Iterator iterator; + + /// Transformation applied to inputs + Transformer transformer; + + /// Source fragment + Fragment source_fragment; + + /// Transformed fragment from transformer + TransformedFragment transformed_fragment; + + // + // Methods + // + + /// Ctor + CUTLASS_DEVICE + TileStoreStream(Params const &_params, TensorRef const &_ref) + : iterator(_params.iterator, _ref) {} + + /// Ctor + CUTLASS_DEVICE + TileStoreStream(Params const &_params, + Coord<3> const &threadblock_offset = make_Coord(0, 0, 0) + ): iterator(_params.iterator, threadblock_offset) { } + + /// Stores a fragment and increments the iterator + CUTLASS_DEVICE + void copy() { + + transformer.transform(source_fragment, transformed_fragment); + iterator.store_post_increment(transformed_fragment); + } + + /// Stores a fragment and increments the iterator + CUTLASS_DEVICE + void copy(Fragment const &frag) { + source_fragment = frag; + copy(); + } + + /// Commits the store operation + CUTLASS_DEVICE + void commit() {} + + /// Accesses the transformed fragment + CUTLASS_DEVICE + Fragment &fragment() { return source_fragment; } + + /// Accesses the fragment after trasnforming + CUTLASS_DEVICE + TransformedFragment &intermediate_fragment() { return transformed_fragment; } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Generic stream for loading and transforming fragments +template , + typename Transformer_ = Copy > +struct PredicatedTileLoadStream : public TileLoadStream { + // + // Type definitions + // + + typedef TileLoadStream Base; + + /// TileLoadIterator + typedef Iterator_ Iterator; + + /// Predicate functor + typedef PredicateFunctor_ PredicateFunctor; + + /// Transformer + typedef Transformer_ Transformer; + + /// Fragment fetched from source memory + typedef typename Base::Fragment Fragment; + + /// Output fragment from transformer + typedef typename Base::TransformedFragment TransformedFragment; + + /// Parameters object used to construct generic load stream + typedef typename Base::Params Params; + + // + // Data members + // + + /// Predicates + typename Iterator::PredicateVector predicates; + + // + // Methods + // + + /// Ctor + CUTLASS_DEVICE + PredicatedTileLoadStream(Params const &_params, + Coord<3> const &bounds, + Coord<3> const &threadblock_offset = make_Coord(0, 0, 0)) + : Base(_params, threadblock_offset) { + this->iterator.initialize_predicates( + predicates.begin(), PredicateFunctor(bounds), threadblock_offset); + } + + /// Loads a tile and increments the iterator + CUTLASS_DEVICE + void copy() { this->iterator.load_post_increment(this->fetched_fragment, predicates.begin()); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Generic stream for transforming and storing fragments +template , + typename Transformer_ = Copy > +struct PredicatedTileStoreStream : public TileStoreStream { + // + // Type definitions + // + + typedef TileStoreStream Base; + + /// TileLoadIterator + typedef Iterator_ Iterator; + + /// Predicate functor + typedef PredicateFunctor_ PredicateFunctor; + + /// Transformer + typedef Transformer_ Transformer; + + /// Fragment fetched from source memory + typedef typename Base::Fragment Fragment; + + /// Output fragment from transformer + typedef typename Base::TransformedFragment TransformedFragment; + + /// Parameters object used to construct generic load stream + typedef typename Base::Params Params; + + // + // Data members + // + + /// Predicates + typename Iterator::PredicateVector predicates; + + // + // Methods + // + + /// Ctor + CUTLASS_DEVICE + PredicatedTileStoreStream(Params const &_params, + Coord<3> const &bounds, + Coord<3> const &threadblock_offset = make_Coord(0, 0, 0)) + : Base(_params, threadblock_offset) { + this->iterator.initialize_predicates( + predicates.begin(), PredicateFunctor(bounds), threadblock_offset); + } + + /// Stores the fragment and increments the iterator + CUTLASS_DEVICE + void copy() { + this->transformer.transform(this->source_fragment, this->transformed_fragment); + this->iterator.store_post_increment(this->transformed_fragment, predicates.begin()); + } + + /// Stores the fragment and increments the iterator + CUTLASS_DEVICE + void copy(Fragment const &frag) { + this->source_fragment = frag; + copy(); + } + + /// Commits the store operation + CUTLASS_DEVICE + void commit() {} +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass + +// clang-format on diff --git a/cutlass/tile_traits_standard.h b/cutlass/tile_traits_standard.h index 14ecd01ab..9145c5da9 100644 --- a/cutlass/tile_traits_standard.h +++ b/cutlass/tile_traits_standard.h @@ -28,7 +28,7 @@ */ #pragma once -#include +#include "cutlass/tile_iterator.h" namespace cutlass { @@ -204,6 +204,9 @@ struct TileTraitsStandard { /// Number of participating warps static int const kWarpCount = kThreads / kWarpSize; + /// By default, do not do scalar loads + static int const kAccessSize = 1; + // Static assertions static_assert(!(ShapeCount::kDhw % kThreads), "Tiling undefined if elements not divisible by threads."); @@ -223,8 +226,7 @@ struct TileTraitsStandard { typedef typename Traits::Delta Delta; /// Delta between each thread's access - /// TODO MTA this is wrong for sure, but Delta is used for stride computation at the moment - typedef Delta ImmediateOffsetStrides; + typedef Shape<0, 0, 0, 0> ImmediateOffsetStrides; /// Number of accesses typedef typename Traits::Iterations Iterations; diff --git a/cutlass/util/complex.h b/cutlass/util/complex.h new file mode 100644 index 000000000..260a3abd2 --- /dev/null +++ b/cutlass/util/complex.h @@ -0,0 +1,457 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#pragma once + +#include +#include "cutlass/cutlass.h" +#include + +namespace cutlass { +namespace platform { + +////////////////////////////////////////////////////////////////////////////////////////////////// + +// +// Accessors for CUDA complex types +// + +/// Returns the real part of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +CUTLASS_HOST_DEVICE +float const &real(cuFloatComplex const &z) { return z.x; } + +/// Returns the real part of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +CUTLASS_HOST_DEVICE +float &real(cuFloatComplex &z) { return z.x; } + +/// Returns the real part of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +CUTLASS_HOST_DEVICE +double const &real(cuDoubleComplex const &z) { return z.x; } + +/// Returns the real part of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +CUTLASS_HOST_DEVICE +double &real(cuDoubleComplex &z) { return z.x; } + +/// Returns the imaginary part of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +CUTLASS_HOST_DEVICE +float const &imag(cuFloatComplex const &z) { return z.y; } + +/// Returns the imaginary part of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +CUTLASS_HOST_DEVICE +float &imag(cuFloatComplex &z) { return z.y; } + +/// Returns the imaginary part of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +CUTLASS_HOST_DEVICE +double const &imag(cuDoubleComplex const &z) { return z.y; } + +/// Returns the imaginary part of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +CUTLASS_HOST_DEVICE +double &imag(cuDoubleComplex &z) { return z.y; } + +////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Class for representing and manipulating complex numbers with conversions from built-in CUDA +/// complex types. +template +class complex { + public: + /// Type alias for scalar type + typedef T value_type; + + private: + // + // Data members + // + + /// Real part + T _real; + + /// Imaginary part + T _imag; + + public: +// +// Methods +// + +/// Constructor +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type + CUTLASS_HOST_DEVICE + complex(T r = T(0), T i = T(0)) : _real(r), _imag(i) {} + +/// Conversion from cuFloatComplex +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type + CUTLASS_HOST_DEVICE + complex(cuFloatComplex const &z) : _real(platform::real(z)), _imag(platform::imag(z)) {} + +/// Conversion from cuDoubleComplex +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type + CUTLASS_HOST_DEVICE + complex(cuDoubleComplex const &z) : _real(platform::real(z)), _imag(platform::imag(z)) {} + +/// Accesses the real part of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type + CUTLASS_HOST_DEVICE + T const &real() const { return _real; } + +/// Accesses the real part of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type + CUTLASS_HOST_DEVICE + T &real() { return _real; } + +/// Accesses the imaginary part of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type + CUTLASS_HOST_DEVICE + T const &imag() const { return _imag; } + +/// Accesses the imaginary part of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type + CUTLASS_HOST_DEVICE + T &imag() { return _imag; } + +/// Converts to cuFloatComplex +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type + CUTLASS_HOST_DEVICE + operator cuFloatComplex() const { return make_cuFloatComplex(real(), imag()); } + +/// Converts to cuDoubleComplex +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type + CUTLASS_HOST_DEVICE + operator cuDoubleComplex() const { return make_cuDoubleComplex(real(), imag()); } +}; + +// +// Accessors for complex template +// + +/// Returns the real part of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE T const &real(complex const &z) { + return z.real(); +} + +/// Returns the real part of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE T &real(complex &z) { + return z.real(); +} + +/// Returns the imaginary part of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE T const &imag(complex const &z) { + return z.imag(); +} + +/// Returns the imaginary part of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE T &imag(complex &z) { + return z.imag(); +} + +// +// Output operators +// + +template +std::ostream &operator<<(std::ostream &out, complex const &z) { + T _r = real(z); + T _i = imag(z); + return out << _r << "+i" << _i; +} + +// +// Non-member operators defined for complex types +// + +/// Equality operator +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE bool operator==(complex const &lhs, complex const &rhs) { + return real(lhs) == (rhs) && imag(lhs) == imag(rhs); +} + +/// Inequality operator +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE bool operator!=(complex const &lhs, complex const &rhs) { + return !(lhs == rhs); +} + +/// Addition +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex operator+(complex const &lhs, complex const &rhs) { + return complex(real(lhs) + real(rhs), imag(lhs) + imag(rhs)); +} + +/// Subtraction +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex operator-(complex const &lhs, complex const &rhs) { + return complex(real(lhs) - real(rhs), imag(lhs) - imag(rhs)); +} + +/// Multiplication +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex operator*(complex const &lhs, complex const &rhs) { + return complex(real(lhs) * real(rhs) - imag(lhs) * imag(rhs), + real(lhs) * imag(rhs) + imag(lhs) * real(rhs)); +} + +/// Scalar Multiplication +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex operator*(complex const &lhs, T const &s) { + return complex(real(lhs) * s, imag(lhs) * s); +} + +/// Scalar Multiplication +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex operator*(T const &s, complex const &rhs) { + return complex(s * real(rhs), s * imag(rhs)); +} + +/// Division +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex operator/(complex const &lhs, complex const &rhs) { + T d = (real(rhs) * (rhs) + imag(rhs) * imag(rhs)); + + return complex((real(lhs) * (rhs) + imag(lhs) * imag(rhs)) / d, + (imag(lhs) * (rhs)-real(lhs) * imag(rhs)) / d); +} + +/// Scalar Division +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex operator/(complex const &lhs, T const &s) { + return complex(real(lhs) / s, imag(lhs) / s); +} + +/// Scalar divided by complex +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex operator/(T const &s, complex const &rhs) { + T d = (real(rhs) * (rhs) + imag(rhs) * imag(rhs)); + + return complex((s * (rhs)) / d, -(s * imag(rhs)) / d); +} + +/// Addition +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex &operator+=(complex &lhs, complex const &rhs) { + lhs = (lhs + rhs); + return lhs; +} + +/// Subtraction +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex &operator-=(complex &lhs, complex const &rhs) { + lhs = (lhs - rhs); + return lhs; +} + +/// Multiplication +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex &operator*=(complex &lhs, complex const &rhs) { + lhs = (lhs * rhs); + return lhs; +} + +/// Scalar multiplication +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex &operator*=(complex &lhs, T s) { + lhs = (lhs * s); + return lhs; +} + +/// Division +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex &operator/=(complex &lhs, complex const &rhs) { + lhs = (lhs / rhs); + return lhs; +} + +// +// Non-member functions defined for complex numbers +// + +/// Returns the magnitude of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE T abs(complex const &z) { + return sqrt(norm(z)); +} + +/// Returns the magnitude of the complex number +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE T arg(complex const &z) { + return atan2(imag(z), real(z)); +} + +/// Returns the squared magnitude +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE T norm(complex const &z) { + return real(z) * real(z) + imag(z) * imag(z); +} + +/// Returns the complex conjugate +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex conj(complex const &z) { + return complex(real(z), -imag(z)); +} + +/// Projects the complex number z onto the Riemann sphere +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex proj(complex const &z) { + T d = real(z) * real(z) + imag(z) * imag(z) + T(1); + return complex((T(2) * real(z)) / d, (T(2) * imag(z)) / d); +} + +/// Returns a complex number with magnitude r and phase theta +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex polar(T const &r, T const &theta = T()) { + return complex(r * cos(theta), r * sin(theta)); +} + +/// Computes the complex exponential of z. +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex exp(complex const &z) { + return complex(real(z) * cos(imag(z)), real(z) * sin(imag(z))); +} + +/// Computes the complex exponential of z. +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex log(complex const &z) { + return complex(log(abs(z)), arg(z)); +} + +/// Computes the complex exponential of z. +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex log10(complex const &z) { + return log(z) / T(log(T(10))); +} + +/// Computes the square root of complex number z +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex sqrt(complex const &z) { + return sqrt(T(2)) / T(2) * + complex(sqrt(sqrt(norm(z)) + real(z)), + (imag(z) < 0 ? T(-1) : T(1)) * sqrt(sqrt(norm(z)) - real(z))); +} + +/// Computes the cosine of complex z. +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex cos(complex const &z) { + return (exp(z) + exp(-z)) / T(2); +} + +/// Computes the sin of complex z. +#pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex with a + // host-only type +template +CUTLASS_HOST_DEVICE complex sin(complex const &z) { + return (exp(-z) - exp(z)) * complex(T(0), T(1) / T(2)); +} + +////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace platform +} // namespace cutlass diff --git a/cutlass/util/cutlass_math.h b/cutlass/util/cutlass_math.h index 0ecdc4382..e3b46ef35 100644 --- a/cutlass/util/cutlass_math.h +++ b/cutlass/util/cutlass_math.h @@ -30,7 +30,7 @@ * \brief Math utilities */ -#include +#include "cutlass/util/platform.h" namespace cutlass { @@ -128,4 +128,38 @@ CUTLASS_HOST_DEVICE value_t lcm(value_t a, value_t b) { return temp ? (a / temp * b) : 0; } +/** + * log2 computation, what's the + * difference between the below codes and + * log2_up/down codes? + */ +template +CUTLASS_HOST_DEVICE value_t clz(value_t x) { + for (int i = 31; i >= 0; --i) { + if ((1 << i) & x) return 31 - i; + } + return 32; +} + +template +CUTLASS_HOST_DEVICE value_t find_log2(value_t x) { + int a = 31 - clz(x); + a += (x & (x - 1)) != 0; // Round up, add 1 if not a power of 2. + return a; +} + +/****************************************************************************** + * Min/Max + ******************************************************************************/ + +template +struct Min { + static int const kValue = (A < B) ? A : B; +}; + +template +struct Max { + static int const kValue = (A > B) ? A : B; +}; + } // namespace cutlass diff --git a/cutlass/gemm/identity_block_swizzle.h b/cutlass/util/numeric_types.h similarity index 79% rename from cutlass/gemm/identity_block_swizzle.h rename to cutlass/util/numeric_types.h index e1bdb2e00..d8094a256 100644 --- a/cutlass/gemm/identity_block_swizzle.h +++ b/cutlass/util/numeric_types.h @@ -1,5 +1,5 @@ /*************************************************************************************************** - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are permitted * provided that the following conditions are met: @@ -22,27 +22,26 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -/*! \file - \brief Defies functors for mapping blockIdx to partitions of the GEMM computation. - - Currently, we only implement an identity mapping. +/*! + \file + \brief */ #pragma once namespace cutlass { -namespace gemm { -//////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////// -struct IdentityBlockSwizzle { - /// Ctor. - CUTLASS_DEVICE IdentityBlockSwizzle() {} +// +// Definitions for 1-bit binary and 4-bit integer types +// - /// Swizzle the block index. - CUTLASS_DEVICE dim3 swizzle() { return blockIdx; } -}; +struct bin1_t {}; // 1-bit binary type -//////////////////////////////////////////////////////////////////////////////////////////////////// +struct int4_t {}; // 4-bit signed integer type + +struct uint4_t {}; // 4-bit unsigned integer type + +/////////////////////////////////////////////////////////////////////////////////////////////////// -} // namespace gemm } // namespace cutlass diff --git a/cutlass/util/platform.h b/cutlass/util/platform.h index 2a44c10e6..3fd7c897d 100644 --- a/cutlass/util/platform.h +++ b/cutlass/util/platform.h @@ -110,9 +110,17 @@ #include // For integral constants, conditional metaprogramming, and type traits #endif -#include +#include "cutlass/cutlass.h" #endif + +//----------------------------------------------------------------------------- +// OS +//----------------------------------------------------------------------------- +#if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__) +#define CUTLASS_OS_WINDOWS +#endif + /****************************************************************************** * Macros ******************************************************************************/ diff --git a/cutlass/vector.h b/cutlass/vector.h index a66dfdef7..aeababb66 100644 --- a/cutlass/vector.h +++ b/cutlass/vector.h @@ -31,7 +31,8 @@ #include #endif -#include +#include "cutlass/util/numeric_types.h" +#include "cutlass/util/platform.h" namespace cutlass { @@ -80,13 +81,43 @@ union Vector { uint32_t registers[kRegisters]; /// Accessor to the ith lane. - CUTLASS_DEVICE Scalar const& operator[](uint32_t i) const { return scalars[i]; } + CUTLASS_HOST_DEVICE Scalar const& operator[](uint32_t i) const { return scalars[i]; } /// Accessor to the ith lane. - CUTLASS_DEVICE Scalar& operator[](uint32_t i) { return scalars[i]; } + CUTLASS_HOST_DEVICE Scalar& operator[](uint32_t i) { return scalars[i]; } }; //////////////////////////////////////////////////////////////////////////////////////////////////// +template <> +union Vector { + /// The scalar type. + typedef half Scalar; + + /// The number of elements in the vector. + enum { kLanes = 1 }; + /// The size of the vector. + enum { kVectorSize = kLanes * (int)sizeof(Scalar) }; + /// The number of registers needed to store the vector. + enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 }; + + // Make sure that the vector type makes sense. + static_assert(kVectorSize <= 16, "Vector type is too large"); + + /// The aligned storage to make sure we have good alignment. + AlignedStruct aligned_; + /// The associated array of scalars. + uint16_t scalars[kLanes]; + + /// Accessor to the ith lane. + CUTLASS_HOST_DEVICE Scalar const& operator[](uint32_t i) const { + return reinterpret_cast(scalars[i]); + } + /// Accessor to the ith lane. + CUTLASS_HOST_DEVICE Scalar& operator[](uint32_t i) { + return reinterpret_cast(scalars[i]); + } +}; + #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16) template @@ -112,19 +143,124 @@ union Vector { uint32_t registers[kRegisters]; /// Accessor to the ith lane. - CUTLASS_DEVICE Scalar const& operator[](uint32_t i) const { + CUTLASS_HOST_DEVICE Scalar const& operator[](uint32_t i) const { return reinterpret_cast(scalars[i]); } /// Accessor to the ith lane. - CUTLASS_DEVICE Scalar& operator[](uint32_t i) { return reinterpret_cast(scalars[i]); } + CUTLASS_HOST_DEVICE Scalar& operator[](uint32_t i) { + return reinterpret_cast(scalars[i]); + } }; #endif //////////////////////////////////////////////////////////////////////////////////////////////////// +/// Vector definition for 1-bit binary datatype +template +union Vector { + /// The scalar type. + typedef bin1_t Scalar; + + /// The number of elements in the vector. + enum { kLanes = kLanes_ }; + /// The size of the vector. + enum { kVectorSize = kLanes / 8 }; + /// The number of registers needed to store the vector. + enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 }; + + static_assert((kLanes >= 8) && !(kLanes % 8), + "May only construct vectors of bin1_t that are multiples of 8 bits."); + + /// The aligned storage to make sure we have good alignment. + AlignedStruct aligned_; + /// The data in registers. + uint32_t registers[kRegisters]; + + /// Default Constructor + CUTLASS_HOST_DEVICE + Vector() {} + /// Constructor to convert from uint32_t type + CUTLASS_HOST_DEVICE Vector(uint32_t value) { registers[0] = value; } + /// Accessor to the ith lane. + CUTLASS_HOST_DEVICE bool operator[](uint32_t i) const { + return ( (registers[i / 32] & (1 << (i % 32))) != 0 ); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Vector definition for 4-bit signed integer datatype +template +union Vector { + /// The scalar type. + typedef int4_t Scalar; + + /// The number of elements in the vector. + enum { kLanes = kLanes_ }; + /// The size of the vector. + enum { kVectorSize = kLanes / 2 }; + /// The number of registers needed to store the vector. + enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 }; + + static_assert((kLanes >= 2) && !(kLanes % 2), + "May only construct vectors of int4_t that are multiples of 8 bits."); + + /// The aligned storage to make sure we have good alignment. + AlignedStruct aligned_; + /// The data in registers. + uint32_t registers[kRegisters]; + + /// Default Constructor + CUTLASS_HOST_DEVICE + Vector() {} + /// Constructor to convert from uint32_t type + CUTLASS_HOST_DEVICE Vector(uint32_t value) { registers[0] = value; } + /// Accessor to the ith lane. + CUTLASS_HOST_DEVICE int operator[](uint32_t i) const { + return (registers[i / 8] >> (i % 8 * 4) & 0x0f) + - 16 * (registers[i / 8] >> (i % 8 * 4 + 3) & 0x01); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Vector definition for 4-bit unsigned integer datatype +template +union Vector { + /// The scalar type. + typedef uint4_t Scalar; + + /// The number of elements in the vector. + enum { kLanes = kLanes_ }; + /// The size of the vector. + enum { kVectorSize = kLanes / 2 }; + /// The number of registers needed to store the vector. + enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 }; + + static_assert((kLanes >= 2) && !(kLanes % 2), + "May only construct vectors of uint4_t that are multiples of 8 bits."); + + /// The aligned storage to make sure we have good alignment. + AlignedStruct aligned_; + /// The data in registers. + uint32_t registers[kRegisters]; + + /// Default Constructor + CUTLASS_HOST_DEVICE + Vector() {} + /// Constructor to convert from uint32_t type + CUTLASS_HOST_DEVICE Vector(uint32_t value) { registers[0] = value; } + /// Accessor to the ith lane. + CUTLASS_HOST_DEVICE int operator[](uint32_t i) const { + return registers[i / 8] >> (i % 8 * 4) & 0x0f; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + template -CUTLASS_DEVICE void make_zero(Scalar_& x) { +CUTLASS_HOST_DEVICE void make_zero(Scalar_& x) { x = Scalar_(0); } @@ -137,15 +273,29 @@ struct Vectorize { //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct Vectorize { - typedef Element_ Type; +template +struct Vectorize, kLanes_> { + typedef Vector Type; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Vectorize, kLanes_> { + typedef Vector Type; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Vectorize, kLanes_> { + typedef Vector Type; }; //////////////////////////////////////////////////////////////////////////////////////////////////// template -CUTLASS_DEVICE void make_zero(Vector& vec) { +CUTLASS_HOST_DEVICE void make_zero(Vector& vec) { for (int i = 0; i < Vector::kRegisters; ++i) { vec.registers[i] = 0; } diff --git a/cutlass/wmma_matrix.h b/cutlass/wmma_matrix.h index c4d8a0b54..61c4ed272 100644 --- a/cutlass/wmma_matrix.h +++ b/cutlass/wmma_matrix.h @@ -28,20 +28,23 @@ #pragma once #if defined(__CUDACC__) && (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700) - -// Dependent header files should use the following macro to guard all code using -// nvcuda::wmma:: to enable compilation for CUDA Compute Capabilities < sm_70. -// Earlier shader models not support Tensor Cores. #define CUTLASS_USE_WMMA_API +#if defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10) && (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 750) +#define CUTLASS_USE_SUBBYTE_WMMA +#endif + #include "stdio.h" +#if __CUDACC_VER_MAJOR__ >= 10 +#include +#else #include -#include -#include -#include -#include -#include +#endif +#include "cutlass/fragment.h" +#include "cutlass/matrix_traits.h" +#include "cutlass/shape.h" +#include "cutlass/vector.h" namespace cutlass { @@ -61,6 +64,34 @@ struct WmmaLayout { //////////////////////////////////////////////////////////////////////////////////////////////////// +/// Statically maps cutlass types to nvcuda::wmma datatypes +template +struct WmmaDataType{ + typedef Type_ Type; +}; + +#ifdef CUTLASS_USE_SUBBYTE_WMMA +/// Statically maps cutlass::Vector to nvcuda::wmma::experimental::precision::b1 +template<> +struct WmmaDataType > { + typedef nvcuda::wmma::experimental::precision::b1 Type; +}; + +/// Statically maps cutlass::Vector to nvcuda::wmma::experimental::precision::s4 +template<> +struct WmmaDataType > { + typedef nvcuda::wmma::experimental::precision::s4 Type; +}; + +/// Statically maps cutlass::Vector to nvcuda::wmma::experimental::precision::u4 +template<> +struct WmmaDataType > { + typedef nvcuda::wmma::experimental::precision::u4 Type; +}; +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + /// Adapter to nvcuda::wmma fragment load and store operations template WmmaShape_::kH, WmmaShape_::kD, /// The scalar. - Scalar_, + typename WmmaDataType::Type, /// The layout. typename WmmaLayout::Layout> { /// This type. @@ -117,7 +148,7 @@ struct WmmaMatrix WmmaShape_::kH, WmmaShape_::kD, /// The scalar. - Scalar_, + typename WmmaDataType::Type, /// The layout. typename WmmaLayout::Layout> { /// This type. @@ -188,6 +219,18 @@ struct WmmaMatrix //////////////////////////////////////////////////////////////////////////////////////////////////// -} // namespace cutlass +// WmmaMatrix cannot be used in a Union and thus in cannot be used in our Vector implementation. +// The only use of WmmaMatrix in in combination with Vectorize has kLanes == 1. Due to this it is +// safe to keep the Vector->Scalar conversion for WmmaMatrix. +template +struct Vectorize, 1> { + typedef WmmaMatrix Type; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +} #endif // defined CUTLASS_USE_WMMA_API diff --git a/cutlass/zip_fragment.h b/cutlass/zip_fragment.h new file mode 100644 index 000000000..37a788614 --- /dev/null +++ b/cutlass/zip_fragment.h @@ -0,0 +1,150 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Models a pair of fragments +*/ +#pragma once + +#include + +#include "cutlass/cutlass.h" +#include "cutlass/shape.h" +#include "cutlass/util/cutlass_math.h" +#include "cutlass/vector.h" + +namespace cutlass { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief A template defining \ref fragment_concept +* @concept{fragment_concept} +*/ +template +struct ZipFragment { + /// First fragment object + typedef First_ First; + + /// Second fragment object + typedef Second_ Second; + + /// This class. + typedef ZipFragment This_; + + // + // Data members + // + + /// First fragment object + First first; + + /// Second fragment object + Second second; + + // + // Methods + // + + /// Default ctor + CUTLASS_DEVICE + ZipFragment() { } + + /// Copy ctor + CUTLASS_DEVICE + ZipFragment(First const &_first, Second const &_second): first(_first), second(_second) { } + + /// Clear a fragment. + CUTLASS_DEVICE void clear() { + first.clear(); + second.clear(); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Helper to construct a ZipFragment object +template +CUTLASS_HOST_DEVICE +ZipFragment make_ZipFragment(First const &first, Second const &second) { + return ZipFragment(first, second); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Zips two convert operations +template +struct ZipConvert { + /// First convert operator + typedef First_ First; + + /// Second convert operator + typedef Second_ Second; + + /// Defines the input zip fragment + typedef ZipFragment InputFragment; + + /// Defines the output zip fragment + typedef ZipFragment + OutputFragment; + + // + // + // + + /// First transformer + First first; + + /// Second transformer + Second second; + + // + // + // + + /// Ctor. + CUTLASS_DEVICE ZipConvert() {} + + /// Ctor. + CUTLASS_DEVICE ZipConvert(First const &_first, Second const &_second): first(_first), second(_second) { } + + /// Transform a fragment. + CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) { + first.transform(src.first, dst.first); + second.transform(src.second, dst.second); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Helper to construct a ZipConvert object +template +CUTLASS_HOST_DEVICE +ZipConvert make_ZipConvert(First const &first, Second const &second) { + return ZipConvert(first, second); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/zip_tensor_ref.h b/cutlass/zip_tensor_ref.h new file mode 100644 index 000000000..d2cff9e0c --- /dev/null +++ b/cutlass/zip_tensor_ref.h @@ -0,0 +1,77 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines a structure containing a pair of TensorRef-like objects +*/ +#pragma once + +#include "cutlass/coord.h" +#include "cutlass/tensor_ref.h" + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ZipTensorRef { + /// First tensor ref + typedef First_ First; + + /// Second tensor ref + typedef Second_ Second; + + // + // Data members + // + + /// First TensorRef + First first; + + /// Second TensorRef + Second second; + + // + // Methods + // + + CUTLASS_HOST_DEVICE + ZipTensorRef() {} + + CUTLASS_HOST_DEVICE + ZipTensorRef(First const& _first, Second const& _second) : first(_first), second(_second) {} +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Constructs a ZipTensorRef +template +CUTLASS_HOST_DEVICE +ZipTensorRef make_ZipTensorRef(First const &first, Second const &second) { + return ZipTensorRef(first, second); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/zip_tile_iterator.h b/cutlass/zip_tile_iterator.h new file mode 100644 index 000000000..f8ba4eee3 --- /dev/null +++ b/cutlass/zip_tile_iterator.h @@ -0,0 +1,287 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +/*! \file + \brief Constructs an iterator that owns two tile iterator instances +*/ + +#pragma once + +#include "cutlass/coord.h" +#include "cutlass/zip_tensor_ref.h" +#include "cutlass/zip_fragment.h" + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Constructs an iterator from a pair of iterators +template +class ZipTileIterator { + public: + /// First iterator type + typedef First_ First; + + /// Second iterator type + typedef Second_ Second; + + /// Params object + struct Params { + /// Parameters of first iterator + typename First::Params first; + + /// Parameters of second iterator + typename Second::Params second; + + /// Constructs a parameters object + CUTLASS_HOST_DEVICE + Params() {} + + /// Constructs a parameters object + CUTLASS_HOST_DEVICE + Params(typename First::Params const &_first, typename Second::Params const &_second) + : first(_first), second(_second) {} + }; + + /// Fragment type + typedef ZipFragment Fragment; + + /// Predicate vector + typedef typename First::PredicateVector PredicateVector; + + /// Index type + typedef typename First::Index Index; + + /// Tensor reference + typedef ZipTensorRef< + typename First::TensorRef, + typename Second::TensorRef> TensorRef; + + // + // Data members + // + + /// First iterator + First first; + + /// Second iterator + Second second; + + // + // Methods + // + + /// Default constructor + CUTLASS_DEVICE + ZipTileIterator() {} + + /// Constructs a zip iterator from params + CUTLASS_DEVICE + ZipTileIterator(Params const &_params, Coord<3> const &threadblock_offset = make_Coord(0, 0, 0)) + : first(_params.first, threadblock_offset), second(_params.second, threadblock_offset) {} + + /// Constructs a zip iterator from iterator instances + CUTLASS_DEVICE + ZipTileIterator(First const &_first, Second const &_second) : first(_first), second(_second) {} + + /// Constructs a zip iterator from iterator instances + CUTLASS_DEVICE + ZipTileIterator(TensorRef const &ref) : first(ref.first), second(ref.second) {} + + /// Constructs a zip iterator from iterator instances + CUTLASS_DEVICE + ZipTileIterator(Params const &_params, TensorRef const &ref): + first(_params.first, ref.first), second(_params.second, ref.second) {} + + // + // Predicate initialization + // + + /// Initializes a predicate vector using a RegularTilePredicateFunctor + template < + /// Predicate iterator + typename PredicateIterator> + CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it, + Coord<3> const &bounds, + Coord<3> const &block_offset = make_Coord(0, + 0, + 0)) { + first.initialize_predicates(predicate_it, bounds, block_offset); + } + + /// Initializes a predicate vector using an arbitrary predicate functor + template < + /// Predicate iterator + typename PredicateIterator, + /// Functor computing predicates + typename PredicateFunctor> + CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it, + PredicateFunctor const &functor, + Coord<3> const &block_offset) { + first.initialize_predicates(predicate_it, functor, block_offset); + } + + // + // No predicates + // + + /// Loads a fragment and increments without predicates + template + CUTLASS_DEVICE void load_post_increment(Fragment &fragment) { + first.load_post_increment(fragment.first); + second.load_post_increment(fragment.second); + } + + /// Loads a fragment and increments without predicates + template + CUTLASS_DEVICE void load_post_increment(Fragment &fragment, + Coord<4> const &offset) { + first.load_post_increment(fragment.first, offset); + second.load_post_increment(fragment.second, offset); + } + + /// Loads a fragment without predicates + template + CUTLASS_DEVICE void load(Fragment &fragment) const { + first.load(fragment.first); + second.load(fragment.second); + } + + /// Loads a fragment without predicates + template + CUTLASS_DEVICE void load(Fragment &fragment, + Coord<4> const &offset) const { + first.load(fragment.first, offset); + second.load(fragment.second, offset); + } + + /// Stores a fragment and increments without predicates + template + CUTLASS_DEVICE void store_post_increment(Fragment const &fragment) { + first.store_post_increment(fragment.first); + second.store_post_increment(fragment.second); + } + + /// Stores a fragment and increments without predicates + template + CUTLASS_DEVICE void store_post_increment(Fragment const &fragment, + Coord<4> const &offset) { + first.store_post_increment(fragment.first, offset); + second.store_post_increment(fragment.second, offset); + } + + /// Stores a fragment without predicates + template + CUTLASS_DEVICE void store(Fragment const &fragment) const { + first.store(fragment.first); + second.store(fragment.second); + } + + /// Stores a fragment without predicates + template + CUTLASS_DEVICE void store(Fragment const &fragment, + Coord<4> const &offset) const { + first.store(fragment.first, offset); + second.store(fragment.second, offset); + } + + // + // With predication + // + + /// Loads a fragment and increments, using predicates + template + CUTLASS_DEVICE void load_post_increment(Fragment &fragment, PredicateIterator pred_it) { + first.load_post_increment(fragment.first, pred_it); + second.load_post_increment(fragment.second, pred_it); + } + + /// Loads a fragment with predicates + template + CUTLASS_DEVICE void load(Fragment &fragment, PredicateIterator pred_it) const { + first.load(fragment.first, pred_it); + second.load(fragment.second, pred_it); + } + + /// Loads a fragment and increments, using predicates + template + CUTLASS_DEVICE void store_post_increment(Fragment const &fragment, PredicateIterator pred_it) { + first.store_post_increment(fragment.first, pred_it); + second.store_post_increment(fragment.second, pred_it); + } + + /// Loads a fragment with predicates + template + CUTLASS_DEVICE void store(Fragment const &fragment, PredicateIterator pred_it) const { + first.store(fragment.first, pred_it); + second.store(fragment.second, pred_it); + } + + // + // Advances the iterators + // + + /// Increments store iterator to next tile + CUTLASS_DEVICE ZipTileIterator &increment(int count = 1) { + first.increment(count); + second.increment(count); + return *this; + } + + /// Increments to next tile + CUTLASS_DEVICE ZipTileIterator &operator++() { return increment(); } + + CUTLASS_DEVICE ZipTileIterator &operator+=(int count) { return increment(count); } + + /// Adds a vector offset to the underlying iterators + CUTLASS_DEVICE ZipTileIterator &operator+=(Coord<3> const &offset) { + first += offset; + second += offset; + return *this; + } + + /// Increments store iterator to previous tile + CUTLASS_DEVICE ZipTileIterator &decrement(int count = 1) { + first.decrement(count); + second.decrement(count); + return *this; + } + + /// Increments to subsequent tile + CUTLASS_DEVICE ZipTileIterator &operator--() { return decrement(); } + + /// Decrements to previous tile + CUTLASS_DEVICE ZipTileIterator &operator-=(int count) { return decrement(count); } + + /// Adds an offset to both iterators + CUTLASS_DEVICE void add_pointer_offset(Index offset) { + first.add_pointer_offset(offset); + second.add_pointer_offset(offset); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namspace cutlass diff --git a/examples/00_basic_gemm/CMakeLists.txt b/examples/00_basic_gemm/CMakeLists.txt new file mode 100644 index 000000000..144263fff --- /dev/null +++ b/examples/00_basic_gemm/CMakeLists.txt @@ -0,0 +1,38 @@ +# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted +# provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, this list of +# conditions and the following disclaimer in the documentation and/or other materials +# provided with the distribution. +# * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +set(EXAMPLES_BASIC_CUTLASS_GEMM_SOURCES + basic_gemm.cu +) + +if (NOT CUTLASS_NATIVE_CUDA) + # cuda_add_executable does not take interface include directories into account + # Let's fetch them and pass them to CUDA. + get_target_property(CUTLASS_INCLUDES CUTLASS INTERFACE_INCLUDE_DIRECTORIES) + include_directories("${CUTLASS_INCLUDES}") +endif() + +cutlass_add_executable( + 00_basic_gemm + ${EXAMPLES_BASIC_CUTLASS_GEMM_SOURCES} +) diff --git a/examples/00_basic_gemm/basic_gemm.cu b/examples/00_basic_gemm/basic_gemm.cu new file mode 100644 index 000000000..d6911c1f6 --- /dev/null +++ b/examples/00_basic_gemm/basic_gemm.cu @@ -0,0 +1,492 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +/* + This example demonstrates how to call a CUTLASS GEMM kernel and provides a naive reference + matrix multiply kernel to verify its correctness. + + The CUTLASS Gemm template is instantiated in the function CutlassSgemmNN. This is kernel computes + the general matrix product (GEMM) using single-precision floating-point arithmetic and assumes + all matrices have column-major layout. + + The threadblock tile size is chosen as 128x128x8 which offers good performance for large matrices. + See the CUTLASS Parallel for All blog post for more exposition on the tunable parameters available + in CUTLASS. + + https://devblogs.nvidia.com/cutlass-linear-algebra-cuda/ + + Aside from defining and launching the SGEMM kernel, this example does not use any other components + or utilities within CUTLASS. Such utilities are demonstrated elsewhere in other examples and are + prevalent in the CUTLASS unit tests. +*/ + +// Standard Library includes +#include +#include +#include + +// +// CUTLASS includes needed for single-precision GEMM kernel +// + +// Defines cutlass::gemm::Gemm, the generic Gemm computation template class. +#include "cutlass/gemm/gemm.h" + +// Defines cutlass::gemm::SgemmTraits, the structural components for single-precision GEMM +#include "cutlass/gemm/sgemm_traits.h" + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// +// This function defines a CUTLASS GEMM kernel instantiation, constructs its parameters object, +// and launches it on the CUDA device. +// +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Define a CUTLASS GEMM template and launch a GEMM kernel. +cudaError_t CutlassSgemmNN( + int M, + int N, + int K, + float alpha, + float const *A, + int lda, + float const *B, + int ldb, + float beta, + float *C, + int ldc) { + + // Define type definition for single-precision CUTLASS GEMM with column-major + // input matrices and 128x128x8 threadblock tile size. + // + // Note, GemmTraits<> is a generic template defined for various general matrix product + // computations within CUTLASS. It is intended to be maximally flexible, and consequently + // it contains numerous template arguments. + // + // To keep the interface manageable, several helpers are defined for plausible compositions + // including the following example for single-precision GEMM. Typical values are used as + // default template arguments. See `cutlass/gemm/gemm_traits.h` for more details. + // + typedef cutlass::gemm::SgemmTraits< + cutlass::MatrixLayout::kColumnMajor, // layout of A matrix + cutlass::MatrixLayout::kColumnMajor, // layout of B matrix + cutlass::Shape<8, 128, 128> // threadblock tile size + > + GemmTraits; + + // Define a CUTLASS GEMM type from a GemmTraits<> instantiation. + typedef cutlass::gemm::Gemm Gemm; + + // Construct and initialize CUTLASS GEMM parameters object. + // + // One of CUTLASS's design patterns is to define parameters objects that are constructible + // in host code and passed to kernels by value. These may include pointers, strides, scalars, + // and other arguments needed by Gemm and its components. + // + // The benefits of this pattern are (1.) a structured, composable strategy for passing host-constructible + // arguments to kernels and (2.) minimized initialization overhead on kernel entry. + // + typename Gemm::Params params; + + int result = params.initialize( + M, // GEMM M dimension + N, // GEMM N dimension + K, // GEMM K dimension + alpha, // scalar alpha + A, // matrix A operand + lda, + B, // matrix B operand + ldb, + beta, // scalar beta + C, // source matrix C + ldc, + C, // destination matrix C (may be different memory than source C matrix) + ldc + ); + + if (result) { + std::cerr << "Failed to initialize CUTLASS Gemm::Params object." << std::endl; + return cudaErrorInvalidValue; + } + + // Launch the CUTLASS GEMM kernel. + Gemm::launch(params); + + // Return any errors associated with the launch or cudaSuccess if no error. + return cudaGetLastError(); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// +// The source code after this point in the file is generic CUDA using the CUDA Runtime API +// and simple CUDA kernels to initialize matrices and compute the general matrix product. +// +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Kernel to initialize a matrix with small integers. +__global__ void InitializeMatrix_kernel( + float *matrix, + int ldm, + int rows, + int columns, + int seed = 0) { + + int i = threadIdx.x + blockIdx.x * blockDim.x; + int j = threadIdx.y + blockIdx.y * blockDim.y; + + if (i < rows && j < columns) { + int offset = i + j * ldm; + + // Generate arbitrary elements. + int const k = 16807; + int const m = 16; + float value = float(((offset + seed) * k % m) - m / 2); + + matrix[offset] = value; + } +} + +/// Simple function to initialize a matrix to arbitrary small integers. +cudaError_t InitializeMatrix(float *matrix, int ldm, int rows, int columns, int seed = 0) { + + dim3 block(16, 16); + dim3 grid( + (rows + block.x - 1) / block.x, + (columns + block.y - 1) / block.y + ); + + InitializeMatrix_kernel<<< grid, block >>>(matrix, ldm, rows, columns, seed); + + return cudaGetLastError(); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Allocates device memory for a matrix then fills with arbitrary small integers. +cudaError_t AllocateMatrix(float **matrix, int ldm, int rows, int columns, int seed = 0) { + cudaError_t result; + + size_t sizeof_matrix = sizeof(float) * ldm * columns; + + // Allocate device memory. + result = cudaMalloc(reinterpret_cast(matrix), sizeof_matrix); + + if (result != cudaSuccess) { + std::cerr << "Failed to allocate matrix: " + << cudaGetErrorString(result) << std::endl; + return result; + } + + // Clear the allocation. + result = cudaMemset(*matrix, 0, sizeof_matrix); + + if (result != cudaSuccess) { + std::cerr << "Failed to clear matrix device memory: " + << cudaGetErrorString(result) << std::endl; + return result; + } + + // Initialize matrix elements to arbitrary small integers. + result = InitializeMatrix(*matrix, ldm, rows, columns, seed); + + if (result != cudaSuccess) { + std::cerr << "Failed to initialize matrix: " + << cudaGetErrorString(result) << std::endl; + return result; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Naive reference GEMM computation. +__global__ void ReferenceGemm_kernel( + int M, + int N, + int K, + float alpha, + float const *A, + int lda, + float const *B, + int ldb, + float beta, + float *C, + int ldc) { + + int i = threadIdx.x + blockIdx.x * blockDim.x; + int j = threadIdx.y + blockIdx.y * blockDim.y; + + if (i < M && j < N) { + float accumulator = 0; + + for (int k = 0; k < K; ++k) { + accumulator += A[i + k * lda] * B[k + j * ldb]; + } + + C[i + j * ldc] = alpha * accumulator + beta * C[i + j * ldc]; + } +} + +/// Reference GEMM computation. +cudaError_t ReferenceGemm( + int M, + int N, + int K, + float alpha, + float const *A, + int lda, + float const *B, + int ldb, + float beta, + float *C, + int ldc) { + + dim3 block(16, 16); + dim3 grid( + (M + block.x - 1) / block.x, + (N + block.y - 1) / block.y + ); + + ReferenceGemm_kernel<<< grid, block >>>(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); + + return cudaGetLastError(); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Allocate several matrices in GPU device memory and call a single-precision +/// CUTLASS GEMM kernel. +cudaError_t TestCutlassGemm(int M, int N, int K, float alpha, float beta) { + cudaError_t result; + + // + // Define several matrices to be used as operands to GEMM kernels. + // + + // Compute leading dimensions for each matrix. + int lda = M; + int ldb = K; + int ldc = M; + + // Compute size in bytes of the C matrix. + size_t sizeof_C = sizeof(float) * ldc * N; + + // Define pointers to matrices in GPU device memory. + float *A; + float *B; + float *C_cutlass; + float *C_reference; + + // + // Allocate matrices in GPU device memory with arbitrary seeds. + // + + result = AllocateMatrix(&A, lda, M, K, 0); + + if (result != cudaSuccess) { + return result; + } + + result = AllocateMatrix(&B, ldb, K, N, 17); + + if (result != cudaSuccess) { + cudaFree(A); + return result; + } + + result = AllocateMatrix(&C_cutlass, ldc, M, N, 101); + + if (result != cudaSuccess) { + cudaFree(A); + cudaFree(B); + return result; + } + + result = AllocateMatrix(&C_reference, ldc, M, N, 101); + + if (result != cudaSuccess) { + cudaFree(A); + cudaFree(B); + cudaFree(C_cutlass); + return result; + } + + result = cudaMemcpy(C_reference, C_cutlass, sizeof_C, cudaMemcpyDeviceToDevice); + + if (result != cudaSuccess) { + std::cerr << "Failed to copy C_cutlass matrix to C_reference: " + << cudaGetErrorString(result) << std::endl; + + cudaFree(C_reference); + cudaFree(C_cutlass); + cudaFree(B); + cudaFree(A); + + return result; + } + + // + // Launch CUTLASS GEMM. + // + + result = CutlassSgemmNN(M, N, K, alpha, A, lda, B, ldb, beta, C_cutlass, ldc); + + if (result != cudaSuccess) { + std::cerr << "CUTLASS GEMM kernel failed: " + << cudaGetErrorString(result) << std::endl; + + cudaFree(C_reference); + cudaFree(C_cutlass); + cudaFree(B); + cudaFree(A); + + return result; + } + + // + // Verify. + // + + // Launch reference GEMM + result = ReferenceGemm(M, N, K, alpha, A, lda, B, ldb, beta, C_reference, ldc); + + if (result != cudaSuccess) { + std::cerr << "Reference GEMM kernel failed: " + << cudaGetErrorString(result) << std::endl; + + cudaFree(C_reference); + cudaFree(C_cutlass); + cudaFree(B); + cudaFree(A); + + return result; + } + + // Copy to host and verify equivalence. + std::vector host_cutlass(ldc * N, 0); + std::vector host_reference(ldc * N, 0); + + result = cudaMemcpy(host_cutlass.data(), C_cutlass, sizeof_C, cudaMemcpyDeviceToHost); + + if (result != cudaSuccess) { + std::cerr << "Failed to copy CUTLASS GEMM results: " + << cudaGetErrorString(result) << std::endl; + + cudaFree(C_reference); + cudaFree(C_cutlass); + cudaFree(B); + cudaFree(A); + + return result; + } + + result = cudaMemcpy(host_reference.data(), C_reference, sizeof_C, cudaMemcpyDeviceToHost); + + if (result != cudaSuccess) { + std::cerr << "Failed to copy Reference GEMM results: " + << cudaGetErrorString(result) << std::endl; + + cudaFree(C_reference); + cudaFree(C_cutlass); + cudaFree(B); + cudaFree(A); + + return result; + } + + // + // Free device memory allocations. + // + + cudaFree(C_reference); + cudaFree(C_cutlass); + cudaFree(B); + cudaFree(A); + + // + // Test for bit equivalence of results. + // + + if (host_cutlass != host_reference) { + std::cerr << "CUTLASS results incorrect." << std::endl; + + return cudaErrorUnknown; + } + + return cudaSuccess; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Entry point to basic_gemm example. +// +// usage: +// +// 00_basic_gemm +// +int main(int argc, const char *arg[]) { + + // + // Parse the command line to obtain GEMM dimensions and scalar values. + // + + // GEMM problem dimensions. + int problem[3] = { 128, 128, 128 }; + + for (int i = 1; i < argc && i < 4; ++i) { + std::stringstream ss(arg[i]); + ss >> problem[i - 1]; + } + + // Scalars used for linear scaling the result of the matrix product. + float scalars[2] = { 1, 0 }; + + for (int i = 4; i < argc && i < 6; ++i) { + std::stringstream ss(arg[i]); + ss >> scalars[i - 4]; + } + + // + // Run the CUTLASS GEMM test. + // + + cudaError_t result = TestCutlassGemm( + problem[0], // GEMM M dimension + problem[1], // GEMM N dimension + problem[2], // GEMM K dimension + scalars[0], // alpha + scalars[1] // beta + ); + + if (result == cudaSuccess) { + std::cout << "Passed." << std::endl; + } + + // Exit. + return result == cudaSuccess ? 0 : -1; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/examples/01_tensor_view/CMakeLists.txt b/examples/01_tensor_view/CMakeLists.txt new file mode 100644 index 000000000..24ab8018a --- /dev/null +++ b/examples/01_tensor_view/CMakeLists.txt @@ -0,0 +1,38 @@ +# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted +# provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, this list of +# conditions and the following disclaimer in the documentation and/or other materials +# provided with the distribution. +# * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +set(EXAMPLES_TENSOR_VIEW_SOURCES + tensor_view.cu +) + +if (NOT CUTLASS_NATIVE_CUDA) + # cuda_add_executable does not take interface include directories into account + # Let's fetch them and pass them to CUDA. + get_target_property(CUTLASS_INCLUDES CUTLASS INTERFACE_INCLUDE_DIRECTORIES) + include_directories("${CUTLASS_INCLUDES}") +endif() + +cutlass_add_executable( + 01_tensor_view + ${EXAMPLES_TENSOR_VIEW_SOURCES} +) diff --git a/examples/01_tensor_view/tensor_view.cu b/examples/01_tensor_view/tensor_view.cu new file mode 100644 index 000000000..e885e6eee --- /dev/null +++ b/examples/01_tensor_view/tensor_view.cu @@ -0,0 +1,424 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +/* + This example demonstrates operations using TensorRef<> and TensorView<> as well as their explicit + equivalent functionality in CUDA code. + + CUTLASS provides abstractions for interacting with multidimension tensors in device memory. + Consequently, we define a hierarchy of pointer-like types for referencing tensors. + + T * - raw pointer to elements of type T + + cutlass::TensorRef - reference to a tensor of elements of type T and given rank. + Includes a mapping function and associated stride vector for + accessing elements in linear memory. + + cutlass::TensorView: - extends TensorRef<> by adding bounds information. This is a + public TensorRef complete mathematical object which may be used as the argument + to CUTLASS functions. + + The above provide an identity maping of a logical index space to linear memory. An element + at logical coordinate X has an offset computed as follows: + + offset = dot(X, stride) + + where dot() computes the inner product of X and a vector of "strides." + + CUTLASS 1.1 introduces a mapping function and an additional 'rank' to offer a flexible way to + map the logical index space of the tensor to memory. The mapping function maps a coordinate + of rank R to an index space of rank S. The linear offset is computed as: + + offset = dot( MapFunc(X), stride ) + + where stride is a vector of rank S. + + + The complete template declaration for cutlass::TensorRef<> is as follows. + + template < + /// Data type of element stored within tensor + typename Storage, + + /// Rank of logical tensor + int Rank, + + /// Maps a Coord in the logical tensor index space to the internal n-D array + typename MapFunc = IdentityTensorMapFunc, + + /// Rank of internal n-D array + int StorageRank_ = MapFunc::kStorageRank, + + /// Index type used for coordinates + typename Index = int, + + /// Index type used for offsets and pointer differences + typename LongIndex = long long + > + class TensorRef; + + + CUTLASS kernels make extensive use of vectorization of memory accesses for efficiency and + correctness. Consequently, we enforce a constraint on the strides used by mapping functions + such that: + + 1. The "fastest-changing" stride is always 1 thereby mandating that consecutive elements in + that rank are consecutive in linear memory. + + 2. The fastest changing rank is always last in the stride vector and not explicitly stored. + + Thus, the stride vector used by mapping functions has length of one fewer than the rank of the + storage tensor. These constraints are consistent with the BLAS interface of passing matrices as + a tuple consisting of a pointer and a "leading dimension." In fact, these are rank=2 tensors + whose fastest changing dimension is 1, and the stride vector is of length 1. + + + A typical mapping function might simply map the rows and columns of a matrix, a rank=2 tensor, + to linear memory such that (1.) elements in the same column are consecutive in memory + (column-major), or (2.) elements in the same row are consecutive (row-major). These can be + accomplished by two different mapping functions whose stride vector is length=2. The first + element is the "leading dimension." + + The following mapping functions demonstrates mappings for these canonical matrix layouts. In + both cases, the logical index space is referenced by coordinates of the form (row, column). + + // cutlass/matrix_traits.h + struct MatrixLayout { + + // + // TensorRefMapFunc definitions for common layouts + // + + /// Mapping function for row-major matrices + struct RowMajor { + + /// Storage rank = 2 implies stride vector: (ldm, 1) + static int const kStorageRank = 2; + + /// Maps (row, col) to (row, col) + CUTLASS_HOST_DEVICE + Coord operator()(Coord<2> const &coord) const { + return coord; + } + }; + + /// Mapping function for column-major matrices + struct ColumnMajor { + + /// Storage rank = 2 implies stride vector: (ldm, 1) + static int const kStorageRank = 2; + + /// Maps (row, col) to (col, row) + CUTLASS_HOST_DEVICE + Coord operator()(Coord<2> const &coord) const { + return make_Coord(coord[1], coord[0]); + } + }; + }; + + + The requirement that the fastest-changing stride always be of unit size need not be a limitation. + To implement "sparse" computations or matrix operations in which matrix elements have arbitrary + stride along both row and column, define a mapping function whose storage rank is 3. This permits + two elements of the stride vector to have a non-unit value. The map function defined in + `cutlass::MatrixTraits::ContiguousLayout` is an example. + + ``` + /// Mapping function for scenario in which layout is row-major or column-major but this information + /// is only available at runtime. + struct ContiguousLayout { + /// Arbitrary storage rank + static int const kStorageRank = 3; + + /// Dimension of rows + static int const kRow = 0; + + /// Dimension of columns + static int const kColumn = 1; + + /// Mapping function defined by runtime variable. Returns coordinates in n-D storage array + /// as (matrix row, matrix colum, 0) + CUTLASS_HOST_DEVICE + Coord operator()(MatrixCoord const &coord) const { + return make_Coord(coord.row(), coord.column(), 0); + } + + /// Helper to construct a stride vector based on contiguous matrix layout and leading dimension + CUTLASS_HOST_DEVICE + static Coord stride(MatrixLayout::Kind layout, int ldm) { + if (layout == MatrixLayout::kRowMajor) { + return make_Coord(ldm, 1, 1); + } + return make_Coord(1, ldm, 1); + } + }; + ``` + + cutlass::TensorView<> extends this concept by including a size vector to specify the bounds of + the index space. The value of each coordinate in the size vector defines the half-open range of + indices whose smallest value is zero. +*/ + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +// Standard Library includes +#include +#include + +// +// CUTLASS includes +// + +// Defines cutlass::Coord<> +#include "cutlass/coord.h" + +// Defines cutlass::TensorRef<> +#include "cutlass/tensor_ref.h" + +// Defines cutlass::TensorView<> +#include "cutlass/tensor_view.h" + +// Defines cutlass::MatrixLayout +#include "cutlass/matrix_traits.h" + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Column-major matrix access +// +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Define a rank=2 tensor modeling a column-major matrix +typedef cutlass::TensorView< + int, // storage element is of type int + 2, // tensor has rank=2 logical index space + cutlass::MatrixLayout::ColumnMajor // column-major mapping function +> TensorViewColumnMajor; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Kernel to copy a matrix from raw memory into a cutlass::TensorView +__global__ void MatrixCopyColumnMajor( + TensorViewColumnMajor destination, // destination tensor accessed by TensorView + int const *source, // source matrix accessed using cuBLAS-style pointer + int ldm) { // and leading dimension + + // Compute unique row and column for each thread + int row = threadIdx.x + blockIdx.x * blockDim.x; + int column = threadIdx.y + blockIdx.y * blockDim.y; + + // Define a coordinate based on the thread's row and column + cutlass::Coord<2> coord = cutlass::make_Coord(row, column); + + // Bounds test + if (coord < destination.size()) { + + // Access the element + destination.at(coord) = source[row + column * ldm]; + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Launches kernel MatrixCopyColumnMajor() +cudaError_t TestMatrixCopyColumnMajor() { + cudaError_t result; + + int const M = 32; // number of rows + int const N = 16; // number of columns + + int const ldm = 40; // matrix leading dimension + + // + // Allocate source and destination matrices + // + + int *Destination; + int *Source; + + int const matrix_capacity = ldm * N; // number of elements in memory needed to store matrix + size_t const sizeof_matrix = sizeof(int) * matrix_capacity; // size of matrix in bytes + + // Allocate destination and source matrices + result = cudaMalloc((void **)&Destination, sizeof_matrix); + if (result != cudaSuccess) { + std::cerr << "Failed to allocate destination matrix on device: " << cudaGetErrorString(result) << std::endl; + return result; + } + + result = cudaMalloc((void **)&Source, sizeof_matrix); + if (result != cudaSuccess) { + cudaFree(Destination); + std::cerr << "Failed to allocate source matrix on device:" << cudaGetErrorString(result) << std::endl; + return result; + } + + // Clear destination matrix in device memory + result = cudaMemset(Destination, 0, sizeof_matrix); + if (result != cudaSuccess) { + cudaFree(Destination); + cudaFree(Source); + std::cerr << "Failed to clear destination matrix: " << cudaGetErrorString(result) << std::endl; + return result; + } + + // + // Initialize matrix + // + + std::vector source_host(matrix_capacity, 0); + + // Procedurally generate input results using several arbitrary constants. + int const magic_row_stride = 2; + int const magic_column_stride = 3; + + for (int j = 0; j < N; ++j) { + for (int i = 0; i < M; ++i) { + source_host.at(i + j * ldm) = i * magic_row_stride + j * magic_column_stride; + } + } + + // Copy to device memory + result = cudaMemcpy(Source, source_host.data(), sizeof_matrix, cudaMemcpyHostToDevice); + if (result != cudaSuccess) { + cudaFree(Destination); + cudaFree(Source); + std::cerr << "Failed to copy from host to source matrix: " << cudaGetErrorString(result) << std::endl; + return result; + } + + // + // Define a TensorView<> pointing to the destination matrix + // + TensorViewColumnMajor destination_view_device( + Destination, // pointer to base of matrix in device memory + cutlass::make_Coord(ldm, 1), // stride vector + cutlass::make_Coord(M, N) // bounds of matrix + ); + + // + // Launch kernel to copy matrix + // + + dim3 block(16, 16); + dim3 grid((M + block.x - 1) / block.x, (N + block.y - 1) / block.y); + + MatrixCopyColumnMajor<<< grid, block >>>(destination_view_device, Source, ldm); + + result = cudaGetLastError(); + if (result != cudaSuccess) { + std::cerr << "Kernel MatrixCopyColumnMajor() failed: " + << cudaGetErrorString(result) << std::endl; + + cudaFree(Destination); + cudaFree(Source); + + return result; + } + + // + // Copy results to host memory + // + + std::vector dest_host(matrix_capacity, 0); + + result = cudaMemcpy(dest_host.data(), Destination, sizeof_matrix, cudaMemcpyDeviceToHost); + + if (result != cudaSuccess) { + std::cerr << "Failed to copy destination matrix to host memory: " + << cudaGetErrorString(result) << std::endl; + + cudaFree(Destination); + cudaFree(Source); + + return result; + } + + // + // Verify result + // + + // Define a TensorView for use in accessing host memory + TensorViewColumnMajor destination_view_host( + dest_host.data(), // pointer to base of matrix in host memory + cutlass::make_Coord(ldm, 1), // stride vector + cutlass::make_Coord(M, N) // bounds of matrix + ); + + // Verify against procedurally computed results + for (int j = 0; j < N; ++j) { + for (int i = 0; i < M; ++i) { + + // computed result + int expected = i * magic_row_stride + j * magic_column_stride; + + // access data by computing explicit offsets + int got_explicit = dest_host.at(i + j * ldm); + + // access data in host memory through a TensorView + int got_view = destination_view_host.at(cutlass::make_Coord(i, j)); + + if (got_explicit != expected) { + + std::cerr << "Error at element (" << i << ", " << j + << ") accessed through explicitly computed offset - expected: " << expected + << ", got: " << got_explicit << std::endl; + + return cudaErrorUnknown; + } + + if (got_view != expected) { + + std::cerr << "Error at element (" << i << ", " << j + << ") accesed through TensorView<> on the host - expected: " << expected + << ", got: " << got_view << std::endl; + + return cudaErrorUnknown; + } + } + } + + return cudaSuccess; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Entry point for tensor_view example. +// +// usage: +// +// 02_tensor_view +// +int main() { + + cudaError_t result = TestMatrixCopyColumnMajor(); + + if (result == cudaSuccess) { + std::cout << "Passed" << std::endl; + } + + return (result == cudaSuccess ? 0 : -1); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/examples/02_cutlass_utilities/CMakeLists.txt b/examples/02_cutlass_utilities/CMakeLists.txt new file mode 100644 index 000000000..f59281e05 --- /dev/null +++ b/examples/02_cutlass_utilities/CMakeLists.txt @@ -0,0 +1,38 @@ +# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted +# provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, this list of +# conditions and the following disclaimer in the documentation and/or other materials +# provided with the distribution. +# * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +set(EXAMPLES_CUTLASS_UTILITIES_SOURCES + cutlass_utilities.cu +) + +if (NOT CUTLASS_NATIVE_CUDA) + # cuda_add_executable does not take interface include directories into account + # Let's fetch them and pass them to CUDA. + get_target_property(CUTLASS_INCLUDES CUTLASS INTERFACE_INCLUDE_DIRECTORIES) + include_directories("${CUTLASS_INCLUDES}") +endif() + +cutlass_add_executable( + 02_cutlass_utilities + ${EXAMPLES_CUTLASS_UTILITIES_SOURCES} +) diff --git a/examples/02_cutlass_utilities/cutlass_utilities.cu b/examples/02_cutlass_utilities/cutlass_utilities.cu new file mode 100644 index 000000000..296699325 --- /dev/null +++ b/examples/02_cutlass_utilities/cutlass_utilities.cu @@ -0,0 +1,359 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +/* + This example demonstrates several CUTLASS utilities in the context of a mixed-precision + floating-point matrix product computation. + + These utilities are intended to be useful supporting components for managing tensor and matrix + memory allocations, initializing and comparing results, and computing reference output. + + CUTLASS utilities are defined in the directory `tools/util`, and definitions appear + namespace `cutlass::` or an inner namespace therein. Operations in `cutlass::reference::` have + both host-side and device-side implementations, and the choice to use device-side initialization + and host-side verification in this example was arbitrary. + + + cutlass::half_t + + This is a host-only implementation of a half-precision floating-point type. It requires no + specialized hardware support from the CPU and emulates arithmetic operations. Device-side code + should use CUDA's `half` type. + + + cutlass::HostMatrix<> + + This template class simplifies the creation of a rank=2 tensor with either a column-major or + row-major layout in memory. + + This class offers methods device_view() and host_view() to provide TensorView objects for + device- and host-side memory allocations. + + + cutlass::reference::device::TensorInitialize() + + This template function initializes the elements of a tensor according to either a procedural + definition or a random distribution. The function in namespace `cutlass::reference::device::` + uses a CUDA kernel to perform this initialization, relying on CURAND to compute random numbers. + + + cutlass::reference::host::Gemm() + + This template function computes the general matrix product. This template supports unique + data types for each matrix operand, the internal accumulation type, and the scalar parameters + alpha and beta. + + + cutlass::reference::host::TensorEquals() + + Compares two tensors of identical rank and returns true if values are bit equivalent. + +*/ + +// Standard Library includes +#include +#include +#include + +// CUTLASS includes needed for mixed-precision GEMM kernel +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/fp16_sgemm_traits.h" + +// +// CUTLASS utility includes +// + +// Defines operator<<() to write TensorView objects to std::ostream +#include "tools/util/tensor_view_io.h" + +// Defines cutlass::HostMatrix<> +#include "tools/util/host_matrix.h" + +// Defines cutlass::half_t +#include "tools/util/half.h" + +// Defines cutlass::reference::device::TensorInitialize() +#include "tools/util/reference/device/tensor_elementwise.h" + +// Defines cutlass::reference::host::TensorEquals() +#include "tools/util/reference/host/tensor_elementwise.h" + +// Defines cutlass::reference::host::Gemm() +#include "tools/util/reference/host/gemm.h" + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Define a CUTLASS GEMM template and launch a GEMM kernel. +cudaError_t Cutlass_FP16_SgemmNN( + int M, + int N, + int K, + cutlass::half_t alpha, + half const *A, + int lda, + half const *B, + int ldb, + cutlass::half_t beta, + half *C, + int ldc) { + + // Define a CUTLASS Gemm using mixed-precision floating-point. + // + // A, B, C, D are half-precision. Internal accumulation is in single-precision. + // + // Note, we use CUDA's `half` type for device-side code including CUTLASS GEMM kernels. + // + typedef cutlass::gemm::Fp16SgemmSgemmTraits< + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kColumnMajor, + cutlass::Shape<16, 128, 128>, + half, // A type + half, // B type + half, // C type + half, // D type + half // Scalar type: alpha, beta + > + GemmTraits; + + // Define a CUTLASS GEMM object. + typedef cutlass::gemm::Gemm Gemm; + + // Construct and initialize CUTLASS GEMM parameters object. + typename Gemm::Params params; + + int result = params.initialize( + M, // GEMM M dimension + N, // GEMM N dimension + K, // GEMM K dimension + half(float(alpha)), // scalar alpha - This is a legal conversion from cutlass::half_t to CUDA's half. + A, // matrix A operand + lda, + B, // matrix B operand + ldb, + half(float(beta)), // scalar beta - This is a legal conversion from cutlass::half_t to CUDA's half. + C, // source matrix C + ldc, + C, // destination matrix C (may be different memory than source C matrix) + ldc + ); + + if (result) { + std::cerr << "Failed to initialize CUTLASS Gemm::Params object." << std::endl; + return cudaErrorInvalidValue; + } + + // Launch the CUTLASS GEMM kernel. + Gemm::launch(params); + + // Return any errors associated with the launch or cudaSuccess if no error. + return cudaGetLastError(); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Allocate several matrices in GPU device memory and call a single-precision +/// CUTLASS GEMM kernel. +cudaError_t TestCutlassGemm(int M, int N, int K, cutlass::half_t alpha, cutlass::half_t beta) { + cudaError_t result; + + // + // Construct cutlass::HostMatrix<> using the half-precision host-side type. + // + // cutlass::HostMatrix<> allocates memory on both the host and device corresponding to rank=2 + // tensors in column-major layout. Explicit synchronization methods are offered to copy the + // tensor to the device or to the host. + // + + // M-by-K matrix of cutlass::half_t + cutlass::HostMatrix A(cutlass::MatrixCoord(M, K)); + + // K-by-N matrix of cutlass::half_t + cutlass::HostMatrix B(cutlass::MatrixCoord(K, N)); + + // M-by-N matrix of cutlass::half_t + cutlass::HostMatrix C_cutlass(cutlass::MatrixCoord(M, N)); + + // M-by-N matrix of cutlass::half_t + cutlass::HostMatrix C_reference(cutlass::MatrixCoord(M, N)); + + // + // Initialize matrices with small, random integers. + // + + cutlass::Distribution dist; + + // Uniform random distribution from -4 .. 4. Values are truncated to integers. + dist.set_uniform(-4, 4); + + // Arbitrary RNG seed value. Hard-coded for deterministic results. + int seed = 2080; + + cutlass::reference::device::TensorInitialize( + A.device_view(), // concept: TensorView + seed, + dist); + + cutlass::reference::device::TensorInitialize( + B.device_view(), // concept: TensorView + seed * 2, + dist); + cutlass::reference::device::TensorInitialize( + C_cutlass.device_view(), // concept: TensorView + seed * 3, + dist); + + // Copy C_cutlass into C_reference so the GEMM is correct when beta != 0. + cutlass::reference::device::TensorFill(C_reference.device_view(), C_cutlass.device_view()); + + // Copy the device-side view into host memory + C_reference.sync_host(); + + // + // Launch the CUTLASS GEMM kernel + // + + result = Cutlass_FP16_SgemmNN( + M, + N, + K, + alpha, + A.device_data(), + A.leading_dim(), + B.device_data(), + B.leading_dim(), + beta, + C_cutlass.device_data(), + C_cutlass.leading_dim() + ); + + if (result != cudaSuccess) { + return result; + } + + // + // Verify the result using a host-side reference + // + + // A and B were initialized using device-side procedures. The intent of this example is to + // use the host-side reference GEMM, so we must perform a device-to-host copy. + A.sync_host(); + B.sync_host(); + + // Copy CUTLASS's GEMM results into host memory. + C_cutlass.sync_host(); + + // Compute the reference result using the host-side GEMM reference implementation. + cutlass::reference::host::Gemm( + cutlass::gemm::GemmCoord(K, N, M), // problem size (type: cutlass::gemm::GemmCoord) + alpha, // alpha (type: cutlass::half_t) + A.host_ref(), // A (concept: TensorRef) + B.host_ref(), // B (concept: TensorRef) + beta, // beta (type: cutlass::half_t) + C_reference.host_ref(), // C (concept: TensorRef) + float(0) // Accumulator initial value passed as argument to deduce + ); // internal accumulation data type as float. + + // Compare reference to computed results. + if (!cutlass::reference::host::TensorEquals(C_reference.host_view(), C_cutlass.host_view())) { + + std::cerr << "Error - CUTLASS mixed-precision GEMM kernel differs from reference." << std::endl; + + // + // On error, print C_cutlass and C_reference to std::cerr. + // + // Note, these are matrices of half-precision elements stored in host memory as + // arrays of type cutlass::half_t. + // + + // Result of CUTLASS mixed-precision GEMM kernel + std::cerr << "CUTLASS:\n" << C_cutlass << std::endl; + + // Result of reference computation + std::cerr << "Reference:\n" << C_reference << std::endl; + + // Return error code. + return cudaErrorUnknown; + } + + // Passed error check + return cudaSuccess; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Entry point to cutlass_utilities example. +// +// usage: +// +// 01_cutlass_utilities +// +int main(int argc, const char *arg[]) { + + // + // Parse the command line to obtain GEMM dimensions and scalar values. + // + + // GEMM problem dimensions: + int problem[3] = { 128, 128, 128 }; + + for (int i = 1; i < argc && i < 4; ++i) { + std::stringstream ss(arg[i]); + ss >> problem[i - 1]; + } + + // Linear scale factors in GEMM. Note, these are half-precision values stored as + // cutlass::half_t. + // + // Values outside the range of IEEE FP16 will overflow to infinity or underflow to zero. + // + cutlass::half_t scalars[2] = { 1, 0 }; + + for (int i = 4; i < argc && i < 6; ++i) { + std::stringstream ss(arg[i]); + + ss >> scalars[i - 4]; // lexical cast to cutlass::half_t + } + + // + // Run the CUTLASS GEMM test. + // + + cudaError_t result = TestCutlassGemm( + problem[0], // GEMM M dimension + problem[1], // GEMM N dimension + problem[2], // GEMM K dimension + scalars[0], // alpha + scalars[1] // beta + ); + + if (result == cudaSuccess) { + std::cout << "Passed." << std::endl; + } + + // Exit. + return result == cudaSuccess ? 0 : -1; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/examples/03_strided_batched_gemm/CMakeLists.txt b/examples/03_strided_batched_gemm/CMakeLists.txt new file mode 100644 index 000000000..564bc6310 --- /dev/null +++ b/examples/03_strided_batched_gemm/CMakeLists.txt @@ -0,0 +1,38 @@ +# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted +# provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, this list of +# conditions and the following disclaimer in the documentation and/or other materials +# provided with the distribution. +# * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +set(EXAMPLES_STRIDED_BATCHED_GEMM_SOURCES + strided_batched_gemm.cu +) + +if (NOT CUTLASS_NATIVE_CUDA) + # cuda_add_executable does not take interface include directories into account + # Let's fetch them and pass them to CUDA. + get_target_property(CUTLASS_INCLUDES CUTLASS INTERFACE_INCLUDE_DIRECTORIES) + include_directories("${CUTLASS_INCLUDES}") +endif() + +cutlass_add_executable( + 03_strided_batched_gemm + ${EXAMPLES_STRIDED_BATCHED_GEMM_SOURCES} +) diff --git a/examples/03_strided_batched_gemm/strided_batched_gemm.cu b/examples/03_strided_batched_gemm/strided_batched_gemm.cu new file mode 100644 index 000000000..e7d387b6c --- /dev/null +++ b/examples/03_strided_batched_gemm/strided_batched_gemm.cu @@ -0,0 +1,349 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#include +#include +#include "cutlass/cutlass.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" + +/* +This example demonstrates how to use cutlass to compute a batched strided gemm. +In this example, both A and B matrix are non-transpose and column major matrix +batched_C = batched_A x batched_B +As an example, matrix C can be seen as +----------------------------------------------------------- +(0,0,0) | (0,0,1) | (0,0,2) | (1,0,0) | (1,0,1) | (1,0,2) | +----------------------------------------------------------- +(0,1,0) | (0,1,1) | (0,1,2) | (1,1,0) | (1,1,1) | (1,1,2) | +----------------------------------------------------------- +(0,2,0) | (0,2,1) | (0,2,2) | (1,2,0) | (1,2,1) | (1,2,2) | +----------------------------------------------------------- +(0,3,0) | (0,3,1) | (0,3,2) | (1,3,0) | (1,3,1) | (1,3,2) | +----------------------------------------------------------- +(0,4,0) | (0,4,1) | (0,4,2) | (1,4,0) | (1,4,1) | (1,4,2) | +----------------------------------------------------------- +(0,5,0) | (0,5,1) | (0,5,2) | (1,5,0) | (1,5,1) | (1,5,2) | +----------------------------------------------------------- + batch 0 | batch 1 +where we denote each element with (batch_idx, row_idx, column_idx) +In this example, batch size is 2, M is 6 and N is 3 +The stride (batch_stride_C) between the first element of two batches is ldc * n + +matrix A can be seen as +--------------------------------------- +(0,0,0) | (0,0,1) | (1,0,0) | (1,0,1) | +--------------------------------------- +(0,1,0) | (0,1,1) | (1,1,0) | (1,1,1) | +--------------------------------------- +(0,2,0) | (0,2,1) | (1,2,0) | (1,2,1) | +--------------------------------------- +(0,3,0) | (0,3,1) | (1,3,0) | (1,3,1) | +--------------------------------------- +(0,4,0) | (0,4,1) | (1,4,0) | (1,4,1) | +--------------------------------------- +(0,5,0) | (0,5,1) | (1,5,0) | (1,5,1) | +--------------------------------------- + batch 0 | batch 1 +, where batch size is 2, M is 6 and K is 2 +The stride (batch_stride_B) between the first element of two batches is lda * k + +matrix B can be seen as +----------------------------- +(0,0,0) | (0,0,1) | (0,0,2) | +----------------------------- batch 0 +(0,1,0) | (0,1,1) | (0,1,2) | +------------------------------------- +(1,0,0) | (1,0,1) | (1,0,2) | +----------------------------- batch 1 +(1,1,0) | (1,1,1) | (1,1,2) | +----------------------------- +, where the batch size is 2, N is 3 and K is 2 +The stride (batch_stride_C) between the first element of two batches is k + + +*/ + +cudaError_t cutlass_strided_batched_sgemm(float const *A, + int lda, + long long int batch_stride_A, + float const *B, + int ldb, + long long int batch_stride_B, + float *C, + int ldc, + long long int batch_stride_C, + float alpha, + float beta, + int m, + int n, + int k, + int batch_count) { + // create a cutlass traits + typedef cutlass::gemm::SgemmTraits > + SgemmTraits; + + // create a CUTLASS GEMM object. + typedef cutlass::gemm::Gemm Gemm; + + // Construct and initialize CUTLASS GEMM parameters object. + typename Gemm::Params params; + + int result = params.initialize( + m, // M dimension for each batch + n, // N dimension for each batch + k, // K dimension for each batch + alpha, // scalar alpha + A, + lda, + batch_stride_A, // distance in memory between the first element of neighboring batch + B, + ldb, + batch_stride_B, // distance in memory between the first element of neighboring batch + beta, // scalar beta + C, // source matrix C + ldc, + batch_stride_C, // distance in memory between the first element of neighboring batch + C, // destination matrix C (may be different memory than source C matrix) + ldc, + batch_stride_C, // distance in memory between the first element of neighboring batch + batch_count + ); + + if (result != 0) { + std::cerr << "Failed to initialize CUTLASS Gemm::Params object." << std::endl; + return cudaErrorInvalidValue; + } + + // Launch the CUTLASS GEMM kernel. + Gemm::launch(params); + result = cudaDeviceSynchronize(); + if (result != cudaSuccess) { + std::cerr << "kernel launch result = " << result << std::endl; + } + return cudaGetLastError(); +} + +template +cudaError_t strided_batched_gemm_nn_reference(std::vector const &A, + int lda, + long long int batch_stride_A, + std::vector const &B, + int ldb, + long long int batch_stride_B, + std::vector &C, + int ldc, + long long int batch_stride_C, + T alpha, + T beta, + int m, + int n, + int k, + int batch_count) { + /* + strided batched gemm NN + */ + + cudaError_t result = cudaSuccess; + + if (A.size() < lda * k * batch_count) { + std::cout << "the size of A is too small" << std::endl; + return cudaErrorInvalidValue; + } + if (B.size() < ldb * n) { + std::cout << "the size of B is too small" << std::endl; + return cudaErrorInvalidValue; + } + if (C.size() < ldc * n * batch_count) { + std::cout << "the size of C is too small" << std::endl; + return cudaErrorInvalidValue; + } + + for (int batch_idx = 0; batch_idx < batch_count; batch_idx++) { + for (int n_idx = 0; n_idx < n; n_idx++) { + for (int m_idx = 0; m_idx < m; m_idx++) { + T accum = beta * C[batch_idx * batch_stride_C + n_idx * ldc + m_idx]; + for (int k_idx = 0; k_idx < k; k_idx++) { + accum += alpha + * A[batch_idx * batch_stride_A + k_idx * lda + m_idx] + * B[batch_idx * batch_stride_B + n_idx * ldb + k_idx]; + } + C[batch_idx * batch_stride_C + n_idx * ldc + m_idx] = accum; + } + } + } + + return result; +} + +int main() { + int const m = 16; + int const n = 24; + int const k = 8; + int const batch_count = 3; + + // A, B are non-transpose, column major + int const lda = m; + int const ldb = k * batch_count; + int const ldc = m; + + int const count_A = batch_count * lda * k; + int const count_B = ldb * n; + int const count_C = batch_count * ldc * n; + + // the memory is batched along K dimension + long long int batch_stride_A = static_cast(lda) * static_cast(k); + long long int batch_stride_B = static_cast(k); + long long int batch_stride_C = static_cast(ldc) * static_cast(n); + + // alpha and beta + float alpha = 1.0f; + float beta = 2.0f; + + cudaError_t result = cudaSuccess; + + // allocate the host memory + std::vector host_A(count_A); + std::vector host_B(count_B); + std::vector host_C(count_C); + std::vector result_C(count_C); + + // allocate the device memory + float *A; + float *B; + float *C; + + result = cudaMalloc(&A, count_A * sizeof(float)); + if (result != cudaSuccess) { + std::cerr << "cudaMalloc result = " << result << std::endl; + return result; + } + result = cudaMalloc(&B, count_B * sizeof(float)); + if (result != cudaSuccess) { + std::cerr << "cudaMalloc result = " << result << std::endl; + return result; + } + result = cudaMalloc(&C, count_C * sizeof(float)); + if (result != cudaSuccess) { + std::cerr << "cudaMalloc result = " << result << std::endl; + return result; + } + + // fill A + for (int b_idx = 0; b_idx < batch_count; b_idx++) { + for (int col_idx = 0; col_idx < k; col_idx++) { + for (int row_idx = 0; row_idx < m; row_idx++) { + host_A[row_idx + col_idx * lda + b_idx * lda * k] = static_cast(row_idx + col_idx * lda + b_idx * lda * k); + } + } + } + // fill B + for (int b_idx = 0; b_idx < batch_count; b_idx++) { + for (int col_idx = 0; col_idx < n; col_idx++) { + for (int row_idx = 0; row_idx < k; row_idx++) { + host_B[row_idx + col_idx * ldb + b_idx * k] = static_cast(n + k * ldb + batch_count * k) - static_cast(row_idx + col_idx * ldb + b_idx * k); + } + } + } + // fill C + for (int b_idx = 0; b_idx < batch_count; b_idx++) { + for (int col_idx = 0; col_idx < n; col_idx++) { + for (int row_idx = 0; row_idx < m; row_idx++) { + host_C[row_idx + col_idx * ldc + b_idx * ldc * n] = 1.f; + } + } + } + + // ref memory + std::vector ref_A(host_A); + std::vector ref_B(host_B); + std::vector ref_C(host_C); + // copy host memory to device + result = cudaMemcpy(A, host_A.data(), count_A * sizeof(float), cudaMemcpyHostToDevice); + if (result != cudaSuccess) { + std::cerr << "cudaMemcpy result = " << result << std::endl; + return result; + } + result = cudaMemcpy(B, host_B.data(), count_B * sizeof(float), cudaMemcpyHostToDevice); + if (result != cudaSuccess) { + std::cerr << "cudaMemcpy result = " << result << std::endl; + return result; + } + result = cudaMemcpy(C, host_C.data(), count_C * sizeof(float), cudaMemcpyHostToDevice); + if (result != cudaSuccess) { + std::cerr << "cudaMemcpy result = " << result << std::endl; + return result; + } + + // run cutlass + result = cutlass_strided_batched_sgemm(A, lda, batch_stride_A, B, ldb, batch_stride_B, C, ldc, batch_stride_C, + alpha, beta, m, n, k, batch_count); + if (result != cudaSuccess) + return result; + + // copy device memory to host + result = cudaMemcpy(result_C.data(), C, count_C * sizeof(float), cudaMemcpyDeviceToHost); + if (result != cudaSuccess) { + std::cerr << "cudaMemcpy result = " << result << std::endl; + return result; + } + + //compare with reference code + result = strided_batched_gemm_nn_reference(ref_A, lda, batch_stride_A, ref_B, ldb, batch_stride_B, ref_C, ldc, batch_stride_C, + alpha, beta, m, n, k, batch_count); + if (result != 0) + return result; + + if (ref_C != result_C) { + std::cout << "CUTLASS strided batched gemm does not run correctly" << std::endl; + return cudaErrorUnknown; + } + + // free memory + result = cudaFree(A); + if (result != cudaSuccess) { + std::cerr << "cudaFree result = " << result << std::endl; + return result; + } + result = cudaFree(B); + if (result != cudaSuccess) { + std::cerr << "cudaFree result = " << result << std::endl; + return result; + } + result = cudaFree(C); + if (result != cudaSuccess) { + std::cerr << "cudaFree result = " << result << std::endl; + return result; + } + + + if (result == cudaSuccess) { + std::cout << "Passed." << std::endl; + } + + // Exit. + return result == cudaSuccess ? 0 : -1; +} diff --git a/examples/04_tile_iterator/CMakeLists.txt b/examples/04_tile_iterator/CMakeLists.txt new file mode 100644 index 000000000..0e74d12db --- /dev/null +++ b/examples/04_tile_iterator/CMakeLists.txt @@ -0,0 +1,38 @@ +# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted +# provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, this list of +# conditions and the following disclaimer in the documentation and/or other materials +# provided with the distribution. +# * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +set(EXAMPLES_BASIC_CUTLASS_GEMM_SOURCES + tile_iterator.cu +) + +if (NOT CUTLASS_NATIVE_CUDA) + # cuda_add_executable does not take interface include directories into account + # Let's fetch them and pass them to CUDA. + get_target_property(CUTLASS_INCLUDES CUTLASS INTERFACE_INCLUDE_DIRECTORIES) + include_directories("${CUTLASS_INCLUDES}") +endif() + +cutlass_add_executable( + 04_tile_iterator + ${EXAMPLES_BASIC_CUTLASS_GEMM_SOURCES} +) diff --git a/examples/04_tile_iterator/tile_iterator.cu b/examples/04_tile_iterator/tile_iterator.cu new file mode 100644 index 000000000..40d5e5519 --- /dev/null +++ b/examples/04_tile_iterator/tile_iterator.cu @@ -0,0 +1,248 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +/* + This example demonstrates how to use the TileIterator in CUTLASS to load data from addressable + memory, and store it back into addressable memory. + + TileIterator is a core concept in CUTLASS that enables efficient loading and storing of data from + and to addressable memory. The TileIterator accepts a TileTraits type, which defines the shape of a + tile and the distribution of accesses by individual entities, either threads or others. + + In this example, a LoadTileIterator is used to load elements from a tile in global memory, stored in + column-major layout, into a fragment, and a corresponding StoreTileIterator is used to store the + elements back into global memory (in the same column-major layout). + + https://devblogs.nvidia.com/cutlass-linear-algebra-cuda/ + + This example uses CUTLASS utilities to ease the matrix operations. +*/ + +// Standard Library includes +#include +#include +#include + +// CUTLASS includes +#include "cutlass/tile_iterator.h" +#include "cutlass/tile_traits_standard.h" + +// +// CUTLASS utility includes +// + +// Defines operator<<() to write TensorView objects to std::ostream +#include "tools/util/tensor_view_io.h" + +// Defines cutlass::HostMatrix<> +#include "tools/util/host_matrix.h" + +// Defines cutlass::reference::device::TensorInitialize() +#include "tools/util/reference/device/tensor_elementwise.h" + +// Defines cutlass::reference::host::TensorEquals() +#include "tools/util/reference/host/tensor_elementwise.h" + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// +// This function defines load and store tile iterators to load and store a M-by-K tile, in +// column-major layout, from and back into global memory. +// +/////////////////////////////////////////////////////////////////////////////////////////////////// + +template +__global__ void cutlass_tile_iterator_load_store_global( + float const *input, + float *output, + int M, + int K) { + + // Define a tile load iterator + typedef cutlass::TileLoadIterator< + Traits, // the Traits type, defines shape/distribution of accesses + float, // elements are of type float + cutlass::IteratorAdvance::kH, // post-increment accesses advance in strided (as opposed to + // contiguous dimension + cutlass::MemorySpace::kGlobal // iterator loads from global memory + > TileLoadIterator; + + // Defines a tile store iterator + typedef cutlass::TileStoreIterator< + Traits, // the Traits type, defines shape/distribution of accesses + float, // elements are of type float + cutlass::IteratorAdvance::kH, // post-increment accesses advance in strided (as opposed to + // contiguous) dimension + cutlass::MemorySpace::kGlobal // iterator stores into global memory + > TileStoreIterator; + + // Defines a predicate vector for managing statically sized vector of boolean predicates + typedef typename TileLoadIterator::PredicateVector PredicateVector; + + // The parameters specified to the iterators. These include the pointer to the source of + // addressable memory, and the strides and increments for each of the tile's dimensions + typename TileLoadIterator::Params load_params; + typename TileStoreIterator::Params store_params; + + // Initializing the parameters for both of the iterators. The TileLoadIterator accesses the + // input matrix and TileStoreIterator accesses the output matrix. The strides are set + // identically since the data is being stored in the same way as it is loaded (column-major + // mapping). + load_params.initialize(input, M*K, M, 1); + store_params.initialize(output, M*K, M, 1); + + // Constructing the tile load and store iterators, and the predicates vector + TileLoadIterator load_iterator(load_params); + TileStoreIterator store_iterator(store_params); + PredicateVector predicates; + + // Initializing the predicates with bounds set to <1, K, M>. This protects out-of-bounds loads. + load_iterator.initialize_predicates(predicates.begin(), cutlass::make_Coord(1, K, M)); + + // The fragment in which the elements are loaded into and stored from. + typename TileLoadIterator::Fragment fragment; + + // Loading a tile into a fragment and advancing to the next tile's position + load_iterator.load_post_increment(fragment, predicates.begin()); + // Storing a tile from fragment and advancing to the next tile's position + store_iterator.store_post_increment(fragment); +} + + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +// Launches cutlass_tile_iterator_load_store_global kernel +cudaError_t test_cutlass_tile_iterator() { + cudaError_t result = cudaSuccess; + + // Creating a M-by-K (128-by-8) tile for this example. + static int const M = 128; + static int const K = 8; + // The kernel is launched with 128 threads per thread block. + static int const kThreadsPerThreadBlock = 128; + // Define the tile type + typedef cutlass::Shape<1, 8, 128> Tile; + + // CUTLASS provides a standard TileTraits type, which chooses the 'best' shape to enable warp + // raking along the contiguous dimension if possible. + typedef cutlass::TileTraitsStandard Traits; + + // M-by-K input matrix of float + cutlass::HostMatrix input(cutlass::MatrixCoord(M, K)); + + // M-by-K output matrix of float + cutlass::HostMatrix output(cutlass::MatrixCoord(M, K)); + + // + // Initialize input matrix with linear combination. + // + + cutlass::Distribution dist; + + // Linear distribution in column-major format. + dist.set_linear(1, 1, M); + + // Arbitrary RNG seed value. Hard-coded for deterministic results. + int seed = 2080; + + cutlass::reference::device::TensorInitialize( + input.device_view(), // concept: TensorView + seed, + dist); + + // Initialize output matrix to all zeroes. + output.fill(0); + + // Launch kernel to load and store tiles from/to global memory. + cutlass_tile_iterator_load_store_global<<< + dim3(1, 1, 1), + dim3(kThreadsPerThreadBlock, 1) + >>>(input.device_data(), output.device_data(), M, K); + + result = cudaDeviceSynchronize(); + + if (result != cudaSuccess) { + return result; + } + + // Copy results to host + output.sync_host(); + + // Verify results + for(int i = 0; i < M; ++i) { + for(int j = 0; j < K; ++j) { + if(output.at(cutlass::make_Coord(i, j)) != float(M*j+i+1)){ + std::cout << "FAILED: (" << i << ", " << j + << ") -- expected: " << (M*j+i+1) + << ", actual: " << output.at(cutlass::make_Coord(i, j)) + << std::endl; + result = cudaErrorUnknown; + break; + } + } + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Entry point to tile_iterator example. +// +// usage: +// +// 04_tile_iterator +// +int main(int argc, const char *arg[]) { + + // Properties of CUDA device + cudaDeviceProp device_properties; + + // Assumne the device id is 0. + int device_id = 0; + + cudaError_t result = cudaGetDeviceProperties(&device_properties, device_id); + if (result != cudaSuccess) { + std::cerr << "Failed to get device properties: " + << cudaGetErrorString(result) << std::endl; + return -1; + } + + + // + // Run the CUTLASS tile iterator test. + // + + result = test_cutlass_tile_iterator(); + + if (result == cudaSuccess) { + std::cout << "Passed." << std::endl; + } + + // Exit. + return result == cudaSuccess ? 0 : -1; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + diff --git a/examples/05_wmma_gemm/CMakeLists.txt b/examples/05_wmma_gemm/CMakeLists.txt new file mode 100644 index 000000000..ab048532c --- /dev/null +++ b/examples/05_wmma_gemm/CMakeLists.txt @@ -0,0 +1,38 @@ +# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted +# provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, this list of +# conditions and the following disclaimer in the documentation and/or other materials +# provided with the distribution. +# * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +set(EXAMPLES_BASIC_CUTLASS_GEMM_SOURCES + wmma_gemm.cu +) + +if (NOT CUTLASS_NATIVE_CUDA) + # cuda_add_executable does not take interface include directories into account + # Let's fetch them and pass them to CUDA. + get_target_property(CUTLASS_INCLUDES CUTLASS INTERFACE_INCLUDE_DIRECTORIES) + include_directories("${CUTLASS_INCLUDES}") +endif() + +cutlass_add_executable( + 05_wmma_gemm + ${EXAMPLES_BASIC_CUTLASS_GEMM_SOURCES} +) diff --git a/examples/05_wmma_gemm/wmma_gemm.cu b/examples/05_wmma_gemm/wmma_gemm.cu new file mode 100644 index 000000000..2b1e3567f --- /dev/null +++ b/examples/05_wmma_gemm/wmma_gemm.cu @@ -0,0 +1,353 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +/* + This example demonstrates how to call a CUTLASS GEMM kernel using Turing integer WMMA. + + The CUTLASS integer WMMA Gemm template is instantiated in the function Cutlass_S8_WmmagemmNN. This + is kernel computes the general matrix product (GEMM) using integer arithmetic accelerated by Turing + WMMA and assumes all matrices have column-major layout. + + The threadblock tile size is chosen as 128x128x8 which offers good performance for large matrices. + See the CUTLASS Parallel for All blog post for more exposition on the tunable parameters available + in CUTLASS. + + https://devblogs.nvidia.com/cutlass-linear-algebra-cuda/ + + This example uses CUTLASS utilities to ease the matrix operations. +*/ + +// Standard Library includes +#include +#include +#include + +// CUTLASS includes needed for WMMA GEMM kernel +#include "cutlass/wmma_matrix.h" + +// This example works only when this MACRO is defined in "cutlass/wmma_matrix.h" +#ifdef CUTLASS_USE_SUBBYTE_WMMA + +// Defines cutlass::gemm::Gemm, the generic Gemm computation template class. +#include "cutlass/gemm/gemm.h" + +// Defines cutlass::gemm::WmmaGemmTraits, the structural components for WMMA GEMM +#include "cutlass/gemm/wmma_gemm_traits.h" + +// +// CUTLASS utility includes +// + +// Defines operator<<() to write TensorView objects to std::ostream +#include "tools/util/tensor_view_io.h" + +// Defines cutlass::HostMatrix<> +#include "tools/util/host_matrix.h" + +// Defines cutlass::reference::device::TensorInitialize() +#include "tools/util/reference/device/tensor_elementwise.h" + +// Defines cutlass::reference::host::TensorEquals() +#include "tools/util/reference/host/tensor_elementwise.h" + +// Defines cutlass::reference::host::Gemm() +#include "tools/util/reference/host/gemm.h" + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// +// This function defines a CUTLASS GEMM kernel instantiation, constructs its parameters object, +// and launches it on the CUDA device. +// +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Define a CUTLASS GEMM template and launch a GEMM kernel. +cudaError_t Cutlass_S8_WmmagemmNN( + int M, + int N, + int K, + int alpha, + signed char const *A, + int lda, + signed char const *B, + int ldb, + int beta, + int *C, + int ldc) { + + // Define type definition for 8-bit signed int WMMA CUTLASS GEMM with column-major + // input matrices and 128x128x128 threadblock tile size. + // + // Note, A and B are 8-bit signed int. C and D are 32-bit int. . + // + typedef cutlass::gemm::WmmaGemmTraits< + cutlass::MatrixLayout::kColumnMajor, // layout of A matrix + cutlass::MatrixLayout::kColumnMajor, // layout of B matrix + cutlass::Shape<128, 128, 128>, // threadblock tile size + signed char, // A type + signed char, // B type + int, // D type + cutlass::gemm::LinearScaling, // functor to do the math in the epilogue + int, // accumulator type + cutlass::Shape<128, 32, 32>, // warp tile size + cutlass::Shape<16, 16, 16>, // WMMA instruction tile size + 16, // scalars every time a thread loads from A + 16 // scalars every time a thread loads from B + > + GemmTraits; + + // Define a CUTLASS GEMM type from a GemmTraits<> instantiation. + typedef cutlass::gemm::Gemm Gemm; + + // Construct and initialize CUTLASS GEMM parameters object. + typename Gemm::Params params; + + int result = params.initialize( + M, // GEMM M dimension + N, // GEMM N dimension + K, // GEMM K dimension + alpha, // scalar alpha + A, // matrix A operand + lda, + B, // matrix B operand + ldb, + beta, // scalar beta + C, // source matrix C + ldc, + C, // destination matrix C (may be different memory than source C matrix) + ldc + ); + + if (result) { + std::cerr << "Failed to initialize CUTLASS Gemm::Params object." << std::endl; + return cudaErrorInvalidValue; + } + + // Launch the CUTLASS GEMM kernel. + Gemm::launch(params); + + // Return any errors associated with the launch or cudaSuccess if no error. + return cudaGetLastError(); +} + + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Allocate several matrices in GPU device memory and call an integer +/// CUTLASS WMMA GEMM kernel. +cudaError_t TestCutlassGemm(int M, int N, int K, int alpha, int beta) { + cudaError_t result; + + // + // Construct cutlass::HostMatrix<> using the integer host-side types. + + // M-by-K matrix of signed char + cutlass::HostMatrix A(cutlass::MatrixCoord(M, K)); + + // K-by-N matrix of signed char + cutlass::HostMatrix B(cutlass::MatrixCoord(K, N)); + + // M-by-N matrix of int + cutlass::HostMatrix C_cutlass(cutlass::MatrixCoord(M, N)); + + // M-by-N matrix of int + cutlass::HostMatrix C_reference(cutlass::MatrixCoord(M, N)); + + // + // Initialize matrices with small, random integers. + // + + cutlass::Distribution dist; + + // Uniform random distribution from -4 .. 4. Values are truncated to integers. + dist.set_uniform(-4, 4); + + // Arbitrary RNG seed value. Hard-coded for deterministic results. + int seed = 2080; + + cutlass::reference::device::TensorInitialize( + A.device_view(), // concept: TensorView + seed, + dist); + + cutlass::reference::device::TensorInitialize( + B.device_view(), // concept: TensorView + seed * 2, + dist); + + cutlass::reference::device::TensorInitialize( + C_cutlass.device_view(), // concept: TensorView + seed * 3, + dist); + + // Copy C_cutlass into C_reference so the GEMM is correct when beta != 0. + cutlass::reference::device::TensorFill(C_reference.device_view(), C_cutlass.device_view()); + + // Copy the device-side view into host memory + C_reference.sync_host(); + + // + // Launch the CUTLASS GEMM kernel + // + + result = Cutlass_S8_WmmagemmNN( + M, + N, + K, + alpha, + A.device_data(), + A.leading_dim(), + B.device_data(), + B.leading_dim(), + beta, + C_cutlass.device_data(), + C_cutlass.leading_dim() + ); + + if (result != cudaSuccess) { + return result; + } + + // + // Verify the result using a host-side reference + // + + // A and B were initialized using device-side procedures. + A.sync_host(); + B.sync_host(); + + // Copy CUTLASS's GEMM results into host memory. + C_cutlass.sync_host(); + + // Compute the reference result using the host-side GEMM reference implementation. + cutlass::reference::host::Gemm( + cutlass::gemm::GemmCoord(K, N, M), // problem size (type: cutlass::gemm::GemmCoord) + alpha, // alpha (type: int) + A.host_ref(), // A (concept: TensorRef) + B.host_ref(), // B (concept: TensorRef) + beta, // beta (int) + C_reference.host_ref(), // C (concept: TensorRef) + int(0) // Accumulator initial value passed as argument to deduce + ); // internal accumulation data type as int. + + // Compare reference to computed results. + if (!cutlass::reference::host::TensorEquals(C_reference.host_view(), C_cutlass.host_view())) { + + std::cerr << "Error - CUTLASS WMMA GEMM kernel differs from reference." << std::endl; + + // + // On error, print C_cutlass and C_reference to std::cerr. + // + + // Result of CUTLASS WMMA GEMM kernel + std::cerr << "CUTLASS:\n" << C_cutlass << std::endl; + + // Result of reference computation + std::cerr << "Reference:\n" << C_reference << std::endl; + + // Return error code. + return cudaErrorUnknown; + } + + // Passed error check + return cudaSuccess; +} +#endif // defined CUTLASS_USE_SUBBYTE_WMMA + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Entry point to wmma_gemm example. +// +// usage: +// +// 05_wmma_gemm +// +int main(int argc, const char *arg[]) { + +#ifdef CUTLASS_USE_SUBBYTE_WMMA + // Properties of CUDA device + cudaDeviceProp device_properties; + + // Assumne the device id is 0. + int device_id = 0; + + cudaError_t result = cudaGetDeviceProperties(&device_properties, device_id); + if (result != cudaSuccess) { + std::cerr << "Failed to get device properties: " + << cudaGetErrorString(result) << std::endl; + return -1; + } + + if ((device_properties.major * 10 + device_properties.minor) < 75) { + std::cerr << "This example needs to run on a Turing device." << std::endl; + return -1; + } + + // + // Parse the command line to obtain GEMM dimensions and scalar values. + // + + // GEMM problem dimensions. + int problem[3] = { 128, 128, 128 }; + + for (int i = 1; i < argc && i < 4; ++i) { + std::stringstream ss(arg[i]); + ss >> problem[i - 1]; + } + + // Scalars used for linear scaling the result of the matrix product. + int scalars[2] = { 1, 0 }; + + for (int i = 4; i < argc && i < 6; ++i) { + std::stringstream ss(arg[i]); + ss >> scalars[i - 4]; + } + + // + // Run the CUTLASS GEMM test. + // + + result = TestCutlassGemm( + problem[0], // GEMM M dimension + problem[1], // GEMM N dimension + problem[2], // GEMM K dimension + scalars[0], // alpha + scalars[1] // beta + ); + + if (result == cudaSuccess) { + std::cout << "Passed." << std::endl; + } + + // Exit. + return result == cudaSuccess ? 0 : -1; + +#else + std::cerr << "CUTLASS WMMA GEMM targeting Turing Tensor Cores features requires CUDA 10." << std::endl; + return -1; +#endif // defined CUTLASS_USE_SUBBYTE_WMMA +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 000000000..23e75d409 --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,28 @@ +# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted +# provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, this list of +# conditions and the following disclaimer in the documentation and/or other materials +# provided with the distribution. +# * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +add_subdirectory(00_basic_gemm) +add_subdirectory(01_tensor_view) +add_subdirectory(02_cutlass_utilities) +add_subdirectory(03_strided_batched_gemm) +add_subdirectory(04_tile_iterator) +add_subdirectory(05_wmma_gemm) diff --git a/media/images/cutlass-threadblock-gemm.png b/media/images/cutlass-threadblock-gemm.png new file mode 100644 index 0000000000000000000000000000000000000000..80b86c766019cf4ab46824e97afcbb18cd2ef6ec GIT binary patch literal 60809 zcmeFaXH=Bi)-H$vQ4kanQL>=00R;ugNkv2fK}kg@Kt*yaa>j%R2&jli7D_+dsbU9^K!)cbqY{gW;y?4Qs8Lp68iU-$zw9->bmI~UNoEkm<{eV_`vIG4 zT6QEPCxwwey96|2TuDe~n6F>Cq~@$Q-Am?t)NrV3;jdUHDy8iUqG}=`Y>v0o&S++K zs2;tQ$<7@apn7sS@V?64_ygByuK2ND3+A{+7tj8EeBY7F+&e7dd~V`gpI<(%Q(W_m zUR^l3R4P!^d8%m1K{(G@{^U}Fbetkobo9T^0Rth1ugRP-ku@k)kv~- zVUUL<>Aa6aUhKZbMf5KCc~OY~^epN^Qh*|J;vPFVHxLefRDikucSfu9J^X*O`*}9<23ZABOZE z{m+||4$E^WzB%VKi&j@n)kyEkw=Q#AwaTsz#O>w}fEx~6v9PePx3?FWd$(^*AqCfx z@7jUYP3O$`^6Z4cu}*4~Na$$}4rWo?E7Dw}!va@e1sfb{YHB_|p`2<9V}s%T0^k@@UKsi~dR*J?CWUEaM;)rhL4r;QHm1FHEZ{S4Pp&`h!KQKN4?vnb>Uw1^DqJ9aBIGN!=tFK zQmbCEs4ISnun9&Sn#}oIkAZCJ9Sm(UUxk91(aAC#*A%0y~&Wf3XcD&{HAjS06>hbcTPu8Q1Q zH0~TO3iqktdjDpz4l4t z?41L54GZFaaUw&WnU=Nv? z(`*_}8k*|Z*HIknlA{Sx*b#){3bucBc zpLVqvkCm_^3OlE<_XT7TuQQ@OSQSziC62n3kd}2A^)U=tA`;tl0l(BYZ{9FVx?Yi$ zrAA#G{b|;(hs;K*UoeT;oAj4%y*Y1ChktffE9=UQgi^EK!rT>lA&WO*B!~AQZjQA> z1$`roKf!6P&x5!eCgoAs8J>y5;ouOhkgpB-^Ksk=zsEuH%e&OSeED){&+?({np-v{ zr#Lvm(@8%4-|>q7FP-ZDv#+xz&2;6ON5^?n(0!?_3~Lu1njicuy|vt`Mna+#cHZDi z`6IIW;VaG%1uiN@NPBsL#}SKZffaC^>2jF<)=d1M5XuYZxU0;oOv_`1Yh<=)uC%(k z+MR?X@};nK-#$wE$mX4CB&vvl{ad!ep*(WVVX}>{bnV@Z2qE^X&prrg?_{6j;P{dz zwIO8Phc0$rc>46Igv;W1nazvRy<<^M(d#6lQ=3bTV{#<>vpgB)LwQT?eBQTYLXB+G zKo#ui`GiwS5MVzd0W1< z+UItN-A68r-*8#=W|5<%$Jsg*sR-x9t%1=9YhIQ3r=D!v+pA?e+Z%JE&1^_;`e@ki zxsg6(24@s*WLz1dG_^Cgvpx6m!5&{58FzR0rO|o^ZX~=A-<>*LdFnKi;Yi!lcFOH< z20_tY+w1)vtGX&GFV|B@#!{lhowRp~e83B>JxdyztbW06#AB-g=D9IfmVqV&A^k>&oRXxGaaL6^mmk28lNTOr=926=5hLcSaXU(P7o=GS?VeH66=&r6G% z>5?TCVid=)y?&v}t|Yt`tSp7a5#crVXw*J08HR|wCZTltkE+J$cGM@76$9$iJSkHiL0IGua zG0xWEFzP-fM8(e|%ea7F3br3=xi@ z{da-I&xfQHA=W`m`TF{7ccFv%-MjBPX_(*g^}{jg;lH>8jxI3F=FlI{y~%{);^Mk> z>lTq(>a52jT5h##NncUA4(i~qk$^k58s@2~gY4&?Dz>hapydLXbQpqd~b!s5AB&PaN4Xn5EfH~6F& z0^--ujN9c8X*d+m(HK7%n(WZ`64ZZr5uIz@-}mp=O5NI6&k z6PI5qPeyS|q3_~Ijez5{rU2&HU)a~Yx@Nks=j$M~g1CZgu}{!&t(aiYvu6h*!(t=^ z=h{;{raQBZqxmhnE_kRwq?zq4TGg>GQBT!4Yn~w6Z`&xj8Yy~A=@weogV2t@5s#x6 zwu%fB5x3`b-FD9-OPKG1f1Jg647#$S0*$GpeLya90(Dk9=N!$6R^e_q02VWBA>8U>-O_8< zuV44|^({m%Elg-m>ALThLk)zRV>7K-D%&f3Y3eIPf0)#-4TA@XT06$2ata>AYj6_W zngGoSRnfK#{r)ak72!VTxUlN9`Uqh@-NI$qt9Gx|*D18q?ivnM-=f+Io zEBIoOX=@^R$Z%Cq1ntQ*eP;6xY;UWww8vC>zS&mEYF~GnmVk#W9E5r{ISdA)lV`C$ z=)>Ylm`sRCyc$3+xxF@gNNR0>Mpb$2T69jB478MyD*TvCEHhFJPR$RQ-BuM5+e zWtgm&;4`@cPf)NV4vILdh`N7*hs^*_)hJ<(o-d!?5)FNU-hqTCLC{@}Zg# zL{2Nb)-3Xu@)xxmyiZ+E63zky0BZK zT*v%t9joR5qm`W2u|?ukhE9yaI3`W_o-$94f&rqa*uSz8!63h7=u77& zT$VkZHgz*iGt+B(DW;t7wFW*#{c9omEMm23Gp`||LEF^}g6*}ZS+Vu%`Yb7i2f8fP z3+q(1dBHZ7KteA-_an;ua&l{e*kwx^cPDa>B{Y~rlE+Iq%6Yh$cF|LAbMP@nysGw0 z(au8$rc4TVN$+efky0FcZ%knEnZ%avaSHJw?u1Iz8vuTz{#4DG%Zs-@2Ck zSqLSjOikp)M^4LYvS=SwBKL(Gh6nA*SxiaAhC_M)F}OZam7Mhb`MRoI+B+Zj9Ex|1 z)qReAL|%V}vT3%Eo(^qu37lVT2oEYnEm;k9F=hjNjp2&4Y?aGILTH^|YqNUcVl7X7 zbkdMKLpm2}JA5?8W%PtOM!V9FmO_dt2}c~0-Z8c85bUVGYk8=p9LhNBvxk{>qTObF z{aaTp#d4g#eYiv1n_ek40HL`a{q0{*a95^*PlLyvq$W=DJL65UMr@MY!I1jvId&T2 zT|KhBmuBgbXz5Y0en+|9)rwmMrrW_-qvcPZoJ^-c4P5+Nuw z_4kxHU9jxRQI=Yd2XAa7Hs4hY9yLNCx7Fb$muW!ZLUK~{WM}r+vl9Y&R=tpBGchnQ z(9kT6#dtM@3r>NLIw*Pj!ERE|?R8_jrzD0oEeQ&8fsCSeE1nMY8{#QbZ5f~dF%dtz zu~5ZQAH9M*lAxPXHjU0PY0lbJNL7eAii&MSi&lk<+qo0kQR#y3%n1`oG2sP=3+ep| z2a=Kv=SnxQVd6A@I!r_K#0BljBO=~XTwdFoF1;dWoKi%jDhK~!p~P5|-( zez}?~HB5s22_)-Rkm)E;j*&7l{YZ;KL+X1_AnFNKU;K3%^8_h>F^5SL|FEw|XPAq3 zi`@>O7i_T%QrwJ6&A}vi>BCYH1I1<>s06U~)aLj!F1N}0Yw_|S+~A!BS|2nvxbACI zN!J!^&kYkt>M10FdfMQJSRUW7e80dUiTR5IXNU*K?b1*gdO^&R_A3@tuLKL}%W*B` z4%S8-{PR;8W&8R|+4%WwA*+QWTkCr(?y8o*Y|;kQzyfw*@AhDI@_F@3-SN9^w02owD zh6`FQOWvjm)k+>p0Sh%^+XF{7x1lSYAiX*ND6IM;`MqQRy)_MoRCyfE^OI#)ZN-?%K+Rc?6DZGHXfD|G7W=y zQ${kW0G>|BV8@qU$MPL5-k2>CSmiht`=@U!!?g(40TRF>5+&s#96r&@7}$OEncksII&!VGwUlZ2oqt7dUc3FsIKeQGM5+J83G|c zHLAQUmlo~=mEVBAW$j;B`#OjvkGI>?-rlYB({MKD9OO={R$?eB*)GE~ns`PvIM ze45v={Ib^Sbf>@t4#GrTgm9yx@t?H?tT8c@Qp!9wRnl|QSpXwuMiNy-)>xf82M}Q$ zIuXZ=B0g*L5qp(;b=fX{adWvi0nN%`}5$`@M^tXj}&k6lo% z!e6=%AasTZ=Rm1l^s^$H*Fv8@b*e2D#b?9ov3Rc#Dq|zIgG42YQko~@d&T^v3A0~9 z0`WPfs^%KPvRgy~ToHDOT`uqf&55ryxU5;;G(urTDz*?3DZQ@du6H`>`Mz4=TECr^ zL*5m1!39_q2o~g}jX3NQ^59pF3t4ulrKoY4>wA=yEPP=wV$1FcNCiOBhsE<-czbgn zWIO^dGS3Z9A|4+<{z2bsD>tlKGq5s*a4!lQlB*Z<5N|#VmAZlD3qTJ5b%N*M&$4?TWJN94H(#*J)kVe`R0ZamG=G{t=G-+I7bL}Rr!`=Oo-md*tVanpBC;;(p}#vd7@O zgUiS^O0XIHq?)88Xy<5lNcJ*B*@tBjrvRO#>drD$$o`A%9ax>X)9eMi6V(rJ^4G5I zms)sFQMlZq0H$Uzx;xGOpdQ)Qu>KA)w3KK>RQxj}C}SKTS^5zY7Tp)8*K<4cHUOLt zeXI#t2tcvM4UwXEK0StaD}^oSK-=9I*OzJ0%kQ!M3hug&I{Qm6PDLqAD?1U275r{4 z$R6hUN&ue3Qe+*wA}2Ss&t36zBh=^wUL?@-js`vMEj_u_-V1`da ziUFQ8JnAX&IyH({u8gl7@O*O%eci%h8e+xmv)Lw@fi6(T^*Ij*KuU`bK$o~WWE0O+ zg-}Tu)*ki1qze}IGQKngw6;87?5Kwupb_k9t+?&At z=G+~~wuEY~pmkj*5`F{*D45K8FZnKV04xs$#3O6-!HfA9OI!(Gf>K zI}v8Cy8XolR6jhZQ`1s=!+?@@yyt;tW1;;!DVE0#uJCXPdrDEmpH1 zIB>v-mr^&l(w}ZBFAML_Zwl8i68l{tAVAOj4fMpuNWoY%MdUdjS>s8F3@Lgg#IJm1 z5{9*}kh*q{A&YK86|-Aip-At2$Rq`6PBak#%H7wY9Dboj@!oBS^YRN|4`uKsZ^ z0El|zcGhdak!x-+jTm^Q4WsbxD-n1V z3y6Y>_XeP*22P36U1_TfQV(#^5xy7fM}T+|w)pk|)_3xJh-R`%0;CNIXS*;^M&tRJ zY}V!n2N|1=0KCrA1W^tudtKU_Wu9AK0vNymq|`|vM?l5Aph|7D0-$3sDNW-^9|t*u z0QKAA>DDCW0b;boB9e1Ij*E{!rAs#TW)UjR=#@k&1poexs7Y@%&?+gMPj2YUHSYk- z#o+GUI{X<0MMa5Y&kI)jm+1Zo;Qwntb^lEW;eT%X&!PE04gLMk-u`EA|Igqt{&SN5 zcRk71!f|5EBE!Zbbx%=fNkSW-Znu9^Y!5N*nFemz2Sld;p97wn;#aDFw=$%Z9Pq;t z;r?gG#q9U9b5;$MKjJHz<_8b`I}I<#aciV$0;aZnXhfp2J^p%dUFcpBA4F4u@OAKO z-9r0P_jNY*rIC7un(E9KoQ>-m=!H63)?fA8wR2eBvdt{u81LZI9Of@V+>K6-kp}c%g zPM{!_DuQwg66-XAO6+BCpvBKRw3Sj8B8>%?;;C$E%c7v-vQ~~zG zKI?VrmoI;U^5AL^vj?aN%=x%9KMVk>n&-9SnZ18r^pBJ!@)Fux{`&P3qPF*dvB;fs z0bcksr7)6~)$Q`U`cEPhsQ4x1eFi|OV=4*qS{@F7+*Vaoyb=AF{|8j^VjXvcpboN; zoFa$EAdv|_@UOp4z`cYtPd!;hR_-}~HA^G3n;`4?J7?E|`h305edz|%B{p{Uv4$vf z4iYa!cJPZvmD3pJK7HB{3MR3K4$py5i#YHx=Ev>1u%Lh_w*4-@gb8HF7T*Wn^qvg5 zK3Y0X&wy4&c>EwcM$C6oAaL0khRgsqm-RcEIW!3gyuhyySDzZHr2sNePN?{d%|uHA zg7pkdCWPwo)7+Q-A<{YW&jp}pT3`I0Jv3P_+<)PJ{=K~TTpM!V-ZVMFgLvi<$uK-*uJ`#`k_G$QN(vO*;MzPQER zz%{>U$F+pcL1ooKeagD87?{>`|MRUUV||!gsN&`uqXkhMfnG1ujB zK>I*q;674Koqb?T;$NOd5DP196y+-qiU>k%*nS-}x(DoF`7;htb#X~9EG%f}0iXie zHM4|slLh7Hh2biIBqc!`qLppryeVX%LSAvJ%a1idy_((WXU7Z{aFK%=rFRi1 z@hp0j7){B*Lkj!HOHkY9bz3RO%k$F=-Wq(yB%Y_4c^6nQ3jotP!@u;ExUslRXMzTc zj!R_(@RQyGyQExQx+F9D^LH!Sl2vJFXwm~>?Kfrpj{!yfmvXcO9TYvyadP!QWoml? zX*(Vs%B5;l706`7)8O(i+hT?WKrS7CRI!IclcgK80y>`)Gd#iS(|P8hrjJN>j1aai zok)yG^9OG3(l`y;kBw)h&A})*YA6MuReUKR*7rN<-`-Bu%zT$ zhRs041~1b_t;QE5MSR~$pI1>)F@-HsDx&-AChLrET8jx-F(AMBfjhwE`IJuI%+5Bd z&q|cqUNgvUS2BNch^6ntozK9Kc=q5hM&`FS0K-rBK&Z29+JCE1G#xo^_2Ggt`q`Y5G2qEE-Paex>w`I!fy2+OtRo%&4Ev;s@|FZYWAj9hl?)ma@!&?+I<&fvYZG0DcL^cFVk5VHgh3QsUnW-0e? z7P45xZT)$>Qg^^h`~z7N6hiU7$zl5+k*9!644(k-OsfT@#T@WLz+ea_*hcUh`h|&{ zmXg{AC7ddgR>rk9FJQ^S5W3h)-X;{_sA5nkbuEI)YU-Q;GO2krVkI3ogThPiwGnxt zWqz*)h$(lIk{>;zd6ToJumYgwFxu*be=|ar{czxwcdU_ zzXMa09hhjH4ivq*Ir-7QK7cDW*UwzM-c8V2o$K$nplnhDF=yIme6p&XW#S|-8oi*Y zOY{G5L}9Fl_^O_&+TmYTh!T^)iqJ;{ZYjYUI2aj#eoTBdOB8rp;<^4k6qe0|Zc4Hz zH^m&|L26r(ll2t2``VySW@e@l+eJ67?hC2ih%th0i0NM(nGD)D>mKkEX2%VE12q7Q z094%+A@s#0yApul!pnk;1q0g#AYFjenVWAvc3YP*(a{;>4F>>Z2B}L3ULVNNkxIN?`jhm=|fCfODh=S$usck@Y%?xT*E( z{MNko$>j8M1;Jg`8X09{ATZukkXl zf*XWU8dL8ls^i@UpR|Mn)1+yB)cS`%r~ig+Z0pL0)Sx^7FspW1o=8OR2AGvus2Wi1 z!ZrgRH`kZUZ(Q}tK3+ibZ|@Jm{1z-6Q_^K`a(K!ci0?n4lWV3v_{lrh|1T7x5d(&3 zf`#}2d@~TkEpc-vraEF?$so~BXC^06uf$b5F2-tV>+G4y_YljpH{d`8l?TMQFZv2S zk;zvt^m3voK*Z7itos<{S*eX_0VX6$W*avO=`fjw0+ek=2X?wS|M?Kjkt54d^8kUG z`5&2rkRiS3v}>#ep)a#ssFHn;C*d>JH(GX`E$*f{-4F{|q!gRd_<#cztj<9(@=yCBDl9{C;HS(O-{$V2~jgVJuv5Mj%jO-|KbFc8bUfUjAHiVwLBR)$KY*0 zSxCzEpF=nc>=I!2^%_M$0jWW@YYtmkhyc3j4?Gang(aU}@ft$m;y?1;Mf6Pg^N8Fc zFVPsn8&%oCCuoG*`rE>0E83G{Jk4}XGW1I7@Q1bEnq~DVd;kiI`#gY|)^FrkJleWE zK%HS^zJF#LIPGH)do1z5v1bb+&?>_Jt)#1`zJ6E>uiXNyf@LU>J@FeLRK;oK_kKZUF%P z@&2zZ>NEXiWw|BR{iU#eXY=}u*xCKCt=pJ;YfS%QYV@dgiQH=Mp9g}_RxP`x8PZs0 z_xW-FyWN+Y<&0GKX<*UoaQMv=wu?Z|n=SbKI(UXskh+3-p@CtCfT|0+DCh_bZX#cl@Cw0W26JXwJ<@%f4?TneOR26^y4Ip_J?*%N23tyVfiDcPDZWg#LYgtw^WfTITT z`Bbr+Rbvf z^dpyp&u=~Nv-wQvmkNCP=ieXKc6Z+da*2$R{(|%Tz_Y#+XI|lz!}w*V3_|pQ@%Dx7Ii_qS*r_b^Bf!`9$n^; zy_%gBa5{GQR|&w04HvRZc?7iB+1R-WvNdYhx%mr~NG+9x8>Ih&Icv68+ zXou=J)1WH4J?qHtimf8V)0SY6`Unj%I(Dw%*Clf1hbQ4s2KxDB)tZ6mrmI;oQt;Qy zPy40=Z%80ZLg|8V0nx;JvrtjbxRXWBXn6jR-xfWY71 zP*+oXMfHh6o0Sf!0ir?E4}Kv(QB4DKK`Br-hE;R@v*8}3>1B2fb0w?XIe)(eGImVvhols6w;y0bQbIr~P((oP&)Eaa$S8{8m-&PRrNSuz z4+Pqr(_JN?Sb=;*kmi025(SkhzJ#clZLTf!yDtT7FKAtaceVXm*?*t&*LHDWL0!fV zvM;C^Ga(D@$_F0}PbzG8zw!44)K!t*pEB8CS2&o#wGb`g^pZvVT23T!Sdz>)36n zZP9#SoQ%c&gWZ^t0Obfd0#vnL9+qc<1E|j=>8jQ}))>R$&Xx+1c#_fFLkZ3(>v0rR zj*#!UDM!AcKdyRQl;3mm+X89tnqMlH>T+!Gt5-+2K0*SY1iD)g-S@(7*YADqkKky? z(g1Nt=R?KBPs6E%(6PiV9(4^!^KG{&JAHZh_J3&yzoMDkLMq&A@6ub?enM2hsw8(* zdV9f30}YO%+Wk8J>*vpJZjUhwmxB^bqoMJBVIYM z`@_$8aN`j0-DV5M{zmMSDFRXlSg5I5ra|YLUfSXX70huO8iUav_VEzo&@4;W3(rn? zkqtC|AZQS|v3*`U9!sZpHQf1eTQC$5RU_e;5Q2AyAO;A9(JW$;A3-e9lW)mD`|Y{& z(3RcnNmuX6*DjDTn;dRn&?XuCA>}bm0y$Bc*ADm$=vp$A^b*7jU5T=ZD98o?=HYEb zb6!m0T92$8Te$X&j6m-=Tr#E%5{xe!)G z<%m(k=3qJ5oPLR`>3}skS?c{42`^qA=})Xrnk(C}vY0NvsA0*5 z8YVcnY!NuZ54nVUzX*AO!wjbY7SeC?Id@?}&X~CH<-wV~U%~>{a5%8CV*UK3S6nnok=I0Xqkgt_{J`5D9`qdJ#?*%!8h+Kg`R$*jM!9dC-h8W1f>xW`&%M|$t?y1beddskKErBU75;G7pvsbC z5Zd<6y@a#`ImZ*`YVZ(r`W6=)CM#gq_TsO}v*y1#Tzi&M@X-^qa(cWE{=+d7lgz7k zZ)+we#txfAYK&J#<;~iY=F}CwYGv*z++E0_opwi8awx9DS3fg(;-^=;H0Bw5eOC=NYh4*vnsCGO` z9wOBoWI4uAJC_nCzjkIPLEoc$RiO0BY_H>Z6P8_Wn}H^qPsp+C%L{h5xVW8E%y?6S z$3`BUJFTyIR=W9jKKp(D>J|llt(6T8QJMNDuFUl>fYaFA z-dLPy<(zDF`5r0B+-c@)vRc97Pe9|ck*FE)+j zT40d$W~;I)l<(^yD7{;t&kqiBkOxhU8hU3MeDbOw6!ZqIZijF zX{kmCx@9XxP5IO1>((gVWSS5-tD2|?um)ej$XyRJ$gytT)3d8r@6dB2K!xZAk!Mn% zWaOp4eaO*UM)~(IKE#9eh;Z^8@*320ntMnf`qrd&tkJcX) zW1FzYXLQBp#$u9o=0C3uekOAqKirO6uuCZdCo$E1H8(NNvh!VQ24oAv24CFcxOsLM^Y+U1XQoQz5>DrnCyXd;fZGXWF=35#s3I5md8JA7>dJS^7&xwB#M9R4D>hipZ?c_Kp2Tfkv5{@B^R<}zB{8{x$w9>n;q4X3@lxBMT8CyJF4?#x%m zqCRWC^>DCESDqO$4t;Z+iN#Aq$k9u&u7*x>pWa_MX(_$_{Bq@+MJ7&6ay#ul$yWLH zRWF_XdJGkzrKBjhw_)i;|JqoD~n!t`X&DP z8=3jVH2+=1|9QIq=n(u(8U1b%{1Ku4=@I;!xcT!b5EAHD)c%|H`ER`a06HLbug4?= zsDiEaBY(u_0TnbCcbiZ-Mxy@Vz<#W~W^i*8?=ji7oxG%YMgVWhahQI^XbE01V%Y=# zwSJ3+-Ms@S&4X$bqm+kb1UlOmJfdIe*sYjvb zmi3BY?Gc5$)nA&ZcdA%;;@(M_wx@g_DBpVdGBA?ZHZ(N6lY*Kv@)%Zsf75ekn<=a| zZ<_vh_axH03{4UGxn?2jB2_>uK_46pOt|2>e9oiy>{v{OzZETRJcmcsb!D1S({8)^ zg}zUkD3%!jyZnCV;Sgw>-KRIaT2_)4s{nv~)Jl8O%1Ce!DPA*1dvmroVmTmb2D%x1 zr0QNo*O~m5iKPPcln1)fR8)o#%a{mMW42P*m%+i|LLN8toj|~+R}vBY@k&JwEcnY@j}jQ|Ezg3E4h zULnUF(8MK@f;U%Gd`5*w+1l2EQ%#zIrr9t_H|@d)uU;O3uE?jJTdtJHo7}1b3FRHz zP`2fmF8SupjnoXcP-I8`_UwqJZp0Z~Y290hRSat>tN`HZgItgjao0~KGBH~T>`U0BWIoquiLu6T!P6&CCK}b#6K`hb7&pFd z8hi0Fn5NHj!{G+)UpH^pycB(4{Cp_idS?q7EsG(hI(Q4JXWjFKG^8y}tCt>yzNLCH zK13b|2U8=%Gt9`i?hWI&j>6@~2?}H1Yl7v9ge{4#;ZF35PlJN+f%!cu`OqIARtmEC zO@~SJPOp=J%6EOogMU7t2pr8IAPRLK@DWMH9WAb1zwQFZpAb8F_tWEBC(CxxhCZK1 zMTrTUOtS2}cV8Ztvb;_ZdSnXQW;WJv!|a@HEp%sM&jJ(B`{9o4e15?zM-QTXhodff zz;a(D`n-2*o+Sgb^FZmx7i@mV&ZY>drf>D{gyiPEUjQLPcyA-LOaI*p{W2Tl;ND+q z37S<2yA+*#CHdO(gq9#_C7@0Q=u)B3+ia8;esxCcayN^|a_mRG8xwv^poQoMD$liD zJ=fteuo-{=hBn9f1Tl)9I#;^AP&F4E9F(F7e6?n}wt9N9ZV@DqWV?%|-hlZHGYESs zbptDtW!64)+kS#2LU>27$nnOF5U!vFxaWk8I=byqT zmNV7XoMrgi>Y#rSIWMe{_o5*{n@uUH%gf_C4)W8iM9x@|}`8zB`qEvVh3r6E2vwo%_&Bj8`h% zx}C1gNg*GaFud1MxtJv@+ zc5?rU;-|@Wg`8U`D5b_)XBcP1?^du*sSuYZnAhUdlGi91G!R^R185V~2kv6)HIv%8 z7VD*W5RN-HFzP-Sa22rs_=3GGj|y6lBwKi^KmD#up-J=<=PtY~9xO zhV7IuM{fQ5TQWJfItD)|)zpdG^S>u8Lt*jN0u7_UA-n@e5HWtd^^DS{7F1ymMlA4vNjZ;iky z54%v~NF?3%ooYrP=K{6+}(j)?o3 zR~cJE4#bzlF1@!Gw_TQuv>RQyBHz7J+P-n8cQ<(%udbN=^zjfg``I2ixh&1}r<8tu6ZaexBa5J3#bmidVMTzVSh(s6RDiK~4_s`4$gbPAXIbLM0Y3GP4hK`PGRzyYsb+pjhH zX(>QSC6D|02L)kuR0;p<>s3oD%>e~$9MWP z*BaG?ZedPG$QslE#B*@0plyT#&0}u3iBF$`rp83`jcWEoq|PCegNLY8dPDXFo23aQR z8qshlu6Bp8=@zAwx2${fK!=T-UoOY|*rWO-gD$7ZHeaz)lk#K_8qXb%0)ntNbf(QixzVZav`3ZeCs!N-DKBu!Wx{r9;mz zehq*=&i()fX(yl$wzoH|xmlP70gvgh039Jnfz;o6E6t|nP?RzZez>z|PfTLLP2FH7 z@$LOrbwTJj*-;i}_;nRxJW_8<5yxflZU-h;GRszPJN~YF5JjHtDeMpo@lh*WuE`)c zbm-EwDCE6(`SM=LiMQiKiTRIP5B6>J6JR>T8&Co|MBb#+T@C`DVv4a}_TtfySf@Zi zzNuv~{Xmt@o_{9Veesqh>T{azhWWWdrGUr%Uit|uUxUXsKQ5^7z}y%=bC}#<>R3no zGb+dpanF8yAPtY&8Y?r!;(0t~wpHT+W>Gk<&}lB-kV*V%a$HNP&tsz5`l5GzL>VBz zPHm}9OYA`sYkh9kJT<$ko{8I=pcP1=2mi4b?C)sg| z{H{)=dznt1K7Cd6=-N%PgQebu*y7VO5h>>Wr{k1wc00TemeaUjS+zs!+)ttVdc4ed z&gdle{?2cGt1UM+%VXSCo4tJ{?GTe{y&b<>L&Jj1@0t;jsg?$$_MHFY1pogz9Ogf_ z{ohqo{lDzIq5mAI{~W3Rbe#O}Hz({rcKaW@{g2)L7wlHV2j)1u@!Ewn^Zb*md4gnF z02(Tw8$Iaw`MV&tH!}ZnXvAA#v7u$gqvIbM2P=Sc=zlo?8Nc6pt%-Crm(8QuS?wH?SX>KJbm?DQTvaM6cVYD65Fm+? z#PQF5sW6s{)XHlhGS9e?d5C;IV1C#P1cWdpP(v$#Su%Vq?YVfxmoJl;sj?)P@-zxV zSoCvDUJzmvff|kgC7MQzhX#5gCku2o08tkn;cdSDECh#)6|@}5;XxpT?wxo3y!`ry z1{gJv!40GolNcC9;m%rzBO>#~@~tdG+MiOh4-E}Ln~fb*uQN@tk4iHi!I0u*{i*L6 zL~D?cQ89|#hFihQ^WJ(_0Xq9oPG#mO^e0)dkY>8w*rpllExP;T)R&~Gy*9f0VXp3N z=CjC@Qr`WXSWhUy2JAFso*aZBdFN^F@1(>0FnEzU7r$*LV(ju%hq5$>5uwuw#1uZv zuGf+X>$^xqP6H$pL_%$uV@X6e)te;x1&=vlWTouD$7>or8iBTh-{S#l=rfuSmQ+Ec208(1}@uoW; z@9%=)$VM~+EDe}kDA@u1)HZAf0ITmfeZf_j>sXoqqb-V|pFaBl$$fN11|}C2BJbb7 zhhZW#sCjlDTo+8w0pV79)?{l^BQn@@?|>Jyb%3O_)oB}=!0I?)Y%FzuRml4B`H*Kr zFq}nK(sj9MHE(&k(+((n=!K&_iXu`0D?sqt+0g%apMH!WG|9kNSZCI+pm2^3ZhV-? zfC5<#kj9POd;5ga;dGLpH5Y$w4u%Am3I?6xhz2rV@Xh4jWl*@~^cj&?`+>Im2aV(w z$ zpORPamTf>rzU6mx($S+pHvTjqU~4ipLrtUWo%KXyF~^{0v2!Li-LvWbnXKqH?Owhx z*cCf67fL$mZVz2BZuif$3afznetku3;~^%YwH{H~fF^@j1=FXEEVc41!nDRH8HBCc z1oviH!N6={12SBpAf#+~67;-6EPG0iJmVo%hxsteLHJ!7`p}hj2UggK>$yJ+#a(=I z4ECn3A5*E5`8nzGaII^e8B9ljAqV+H`JI`zg-+%k5?kT2FYRzFkarjG(1wA;b1(y!PNXViy04@NPK1U&98aUJ98)7?fVv(u z9`TC6bPw1(aX9U8Qm0VaFhtQB&Lqje^$Oa7hc-j<8X0=&q}m)X*bI@=g~6}xuL4|8 zfk>;s-DJu0ApI(!+61{_*a7&p^Fa(Lj&&13G}l32kfvX%qVE8cd*NssQ6cvQQ7tdH zO(2n7`D;b7r$j|NiNl|?Gj;$Bn?m|OkxxYfr^^fQbQ!RUAp07#>Mi^suddewQ9TT_ z$SGC*2rAn3VG!7n=z-RQ_nMLtj49N(3uB-~>J;S_72EF4uz~Wc#plQC_Wkz`Mjq(E z`2_{_gSZlusRVtPZ#(Yk1|2e^*d7CT!_S||Nj|=ldkhB;2*NjHzVne7v?YjStDm3L zH@>I}GmAy+$DeC?I8OHzEJ1JHSx`;GQGo8nO_|khI$ir0%xCJD3WedoZ*TPPVAYb3 z9D-l^GgCtY@3(id{v#vzV0!TrfxuK~k^^A8;4>EpLj(N$z~@8}?tnLieJp`clTDmn z1ARCf8(RZ10H;A*d545#&3m?x&<_*oPJte6!~PI7!?-~+)+5v*5|SzESct$tS)T`Y zEHaGqgJF@R7E*@r^pmN`eEc97$_%FqX|VBdJjg7e8Dj>mHvlMaA0inb`wnxIpozGz z*u{a&6NE4<(w;p%Np2sZ1z>w6zia|?0mjY1495orU#c;M3riz4pF=6I(nL46DHn6~xNI5LSGd)~GOf+8#+&VE8=>3q*Tcur_p zFoc_p8Vp1N{>s+Nq!mN~6Pt@%a`wwW6C!A@{dgpgLObl0bJ%E)6)o`C13>H5^ME*h zwnUNvwX^EALkFtMJhrMC7|;kfH_UtduzuX?8>obl#;H87%02VAJ!Di!cSI1ym z1W_PhY1~sDksr$G-H#E?I*9j;f2fx4BMQ|FcI?12M2$>-7?ILCRM1BLXdm5$=--P2 zaSwm0p)Z1T?tTX~%I1d%nRGuQBNrPYZ}=gwB|-2wJvnsw2}INQv;BMOzi`uscHCG< zFzl&H?$x}6?V5~S0ew-FgiDrGPquMTVjUZ920}IDaQLX~?NqNMFb`i4jPAZ~7s_l3 zYSc$Tl1P8rCxf3i8$dth>Aj-_{SpDfeK?rn1057Eq`kIb<)m%QPGEpg1WkGd+^j>_ zNf*+>c5QIc=ZnfIAtwX`R3AE!ke+t{gF$vAZCn_N+ijBvZ7rkF`0FZC0UT;QWAP=< zIN=@}892LyYd4|a4F>adDq@ zC^GcrOMd7o1BkZ(vb>DqMR}Ok1gg_m95RUXk$xbIL-WJcdcrJw185?oJ#oTxtAZ31 zzEz6s25>Um39q2J=S{k?5Az;S2<~3`?)%gk>6d$4b@<2jT(SyCh4xn%A_Rx%w2iRK z7nopl0S4FErP=A&+S(%hh7q#4OVE)K;hQakN$`0#P&v_Gp}xY7y&p-itf7@5#M2R`9mH8bTsy%@rWo1H~5I8%+BMop|pq8`f z%He`Gcl>J*dW#i@?>;uf4gdTBV$Icpn1p>M@QedeYo`n^!eOcFInlB@QWqvrPu6J$ zc`5X%v>D7oSb=fOFPT-m+ zY-yhl?yv=8ufh{P%AW+?rU8d7jETCK*DN^=o%d#2udYK=cFzA`@6DsJUc<1_=0Srb zLjx6+dCDw9LJCR5o4GQRc`A~SjA@i9ghYmy%=1v9gv?TjLZ-|_gme9B*KU97{Bh1% z-#KTkbH26qUv2OE{)XqdpZmV<>%Oj2Geu&fSvmc64v4j_pLac!cWLPWyD)@DpIo&P z1ssxSoHz{%scpK1IDKtJSiJ#}U?UeXm-v=~*&y65Mtcs$Em^jwpOYE-atCQzcxs=} zKexzW8(%mM613#(PBoXeF5t;$H7NBx1j`=-bnglAT1F+DfA&K&-cy~3OOoRi)Hq5u zm-Li?4nmwJ$X%hEYCgR45%mHrA3lIbj3MeAT0$7jD(}-8hRH1V_bQY_{<+Ar1+6}u zxj8yFHz-md926Lc#77)ed_XuEHyTe7uEdviq)oI1y^B4`dm2eE%FZK8;=RbBi0M+xkfk4eQerRo zsFxSm?}kDFy1#_wfJIn|5;0(7&!W5$lM@A-vPV(bozGd*&*UKe&Z_pxkMXL(qj(gQ zm92WQ@)f6jYFGVP6Eql^|IvL+6%(?N91;1u9`VvkT`6*8m^$o))Wo72N*RnU{V`qL zTI}zxyynVf6-r^QsH(!aP1`4ip6o@;GE%sS9FuE12O`FHT^)9<$M4OF;lsWyVs(hw zH3hth7aa4A`^s4CTGUCa6H+qQG1!9^_eOI5LjTHKRuQk0{DYsg7Yr=hT3j))oeF8~ zPnh@JaK&k^3fX~qWUfeku20?m6kXYb?dA73U?$O!vSU9XY}$G=bk&l`!;#V>qkfoT zFtL0?OHApCTFG9U9<+?~<~UaKc&fidBy|6rJhrRX$&`2kWT-QlBs_~0pQ|J&M-n2^ z62qQ6_t(iIzmwU$60=o_w=y4tw7&DQ3HysaFG|TXDQEaB*iD~ZTi?1O9|>Y@tFJFH z*o;ATW;G;|wK`=Im@Rsj^He##X0wMgUwff*TG(sOf1dJc+xWF9UHP&7N^Zz*jk!%U z6Bb)r>Mt#!O)3mcgUA$Z?Srwwn3*$x<{@^iL!Whyu@X9%v!4R5gebLrLg?gEd``W6 zd)T*AdR1X!j!BwSYiODg$7&4mm|oEGUO46QlfeCG5IU==4!O- zh$uf5)i?$c!&2L05X`>hQBh%1`yvKU8;f&K^!{wpzYgJP^SagE1F>Pr3~p}Sm^W?S zaTtTfK_Ym_r@t{iK$Ge^atNQr(MmjZG`g$98@V33+D|KC+T5#j_SXlCMz5@rEyiYDmKhnnxnF-JJbUOW)cY?LDQANborIqW%YOhV z@jFuPS9EtzO3TVZ^uoGe;6R6Kr^`?(G3|<(W+P&`VO$c5>4f0AhPfN+Wi^SA_(1?` z*YZynLZSM8qIz=cN=os1K#j6rJ*pG$J=^TfE?3`E33WmffzAkrdIg_F&b zNGyXpkG@yS|C;d{b)%(Ij{B)?)&>2980s7^mz$UvU%HdHrOt8Ga6r?HQmMFx-eo6; zWj2}cAvXcdsXB1+GI3b#EvhNehyV(5F`_tD_^>oMTQML?IUD7Bl8$@Yg(5vw0}ywM`)BA>&nVvSP+@TLcI*- zz^p4i z35Lvkl2MDQgDF49tLsuD9y*6iI+p*q6q2Cn``jz*4A1$q*94L`pa- zL3a2i4j+lGrMr#o_Bc0cbb=IVNYaCM5u6TA<@(0OVcLf=azv4Lo0dyDJv4{S=aG?sOXtiw~YW2d&A%nnHJtw$`C@~!}#+(xGIH16%)}g>YK3X zbKpMG1=c9MX9;ty*fL#WR9mfuRFCve0&LhWvoJ>Gsk|0Qv3BtYye8E5YahqwHFhL2 znb(B!?G(CjY^K>A%LvD=)!5R-Sx2|seWx!U_V8_hvaO1oSk;2@r|$zF|UEA6=xp}n}WbZ z#CrJ{TN=X!!M-eaCna^-%Ncw%c^NxYg*~%XNngR>JvklVRKVcbC#v-@R&oC0UTLI% zDHIFwaTrEh;~pVo3gKO2F{W6sB{8?mORagIo$O$pEM6b^Ff7NSJtJPzSpmo{2%wP- zuWO;*!&)O#IVjX#g-#QSp$&qBrN`lRP1Z?}Sj*VJgIzI+bt&~A3`u1piLI*_0Z zPaH>-goM(C6Y-e0zO^4n7DiVia9*qm9iLq4&n(eSe z(!5pYR zOgK21bxyzBYOM2$(1&ha4Lh$ezp~5_mn?p2Dk>M9HuZz{?H<4K@E?q%q}yG(S;}ch zNX-s>*s_CEaIioPameqkH zhKLP4(D0F}>E9$pmJQH%9Tv2BYFa!Ut1`5K>hh=KNEBuC=WcT&O}@WsG`|Ou3LD+r z$M#5k#(e)rPJiE*b4zF2Zx2^m{I&UJ3Q zxZ9WKbq4qz5)=JQ8=;JERaURh%neBv2q33E!R&WI}ca$e1J#o`E%JONj|M+mT8^C(xf}2G~m@ADl0z4*QAugYVNn;S* zI&rp_ugrn15GTcL_yic(_7`@D9g54_DsgAc(#DW&JbIULn;?6ooqlpBTW;+DaA|ExGuJ#JEmpSm6eU|%^))JvA^a&( zJ}LAdqBW)A6%%Vvriv=WEDLg%VpqTJ3RS z_a4XAq6XT5gt-&Haj6}v0L{wWzR3fsi=jd zwkZ&xDQXHK08gVIboFmuhE=wJp>L_-Lq|f$J(vs}6|N3QPOq1!T`HJ5ymjs^Grrla zI%bs4`U#-af>EjmT)#53vQgW3`-QT|b|R2bENFX*o}@fXt_p7LZ>ZIxe)M8avkYuql6}_L*{*yw-L>!6lTG}FY~wk2v4rcT-oVsEdUXp@ zoGD)eTJQShUMCm6i;<`NmT;yGGFiwssJ9q6i@1-zMs+D-fYgv3ie=MH_tpb>R+{%& zI#@NGN7zw>ZC3fzPri%1ZsLMm-=P#jcu3KDT@x?A_Cj;w?d-R0w(-{8uel3r-f%#w2FTDL$$a<5HX8?T z{ZcaBBOks@rovyirw4+e^DIh{b6rti_eBO^R25EX<J~ zkQv>7*z$j31wCI#NNBe2nZu^(bjHi}8yG&p)rfEC*ELJ0MnB{KNdB?yuo+sYQBvXv(GgO}lZ#?M9 z*Hn2_VPd69`PZv13F6R%pnd<+suYzNPf2Ee=L1u3`EV+ab?R{IlB?9m9%#aQD(M-Y zJsz@SOsk7e-e7BLMD|hA1gh_Y)IoL!$Wv@L84d}}iU%_uPnDWVa}5TbTK$d{z^{Ix zQ);&2Q6SkSd)V2l=k{)u7<% z`i>8d$T|CAOB{?i_+mPbu>KU}KvWPLey!Fh$D zu9ac5Ll;;?uxy_JUei`9AkefMK|wjjO=#n-3mIYdG#RryI;zA@93AcaOtFOTi$!bR zPExs)4^P36ur`A(oa&5y0=o%CC}TNUS%`boYRHRHlQ5e6ltO-L9o-y78ORkw4KKs* zKqr3aRBesX+FwQcn5)i!q&q}sJmz_s@iNXW9!Tm}wjOMLLZ}Bi6~WKp`46wm?G3@$ zQlO2;b)VTV(f3BmtZ^TINEDJ~#My0JTFqLcxFNq3K>{SUdB?o=v%sv*}ykVC8)&k|=H8hh&kMyR6+cQck_0HlXUGBY*Pw@=jBja0S( z4>0?(f^sY7rG=>n+97aHpj$lpl9@78A=&K znx|&<)L_eNCBuJj)~~J`@?Yg;ZlN(H<_)pl$|1G>Q@g}3+8}%qqC?Y5p$<-qC?mu2 zYwU!{vc-hTnuW*aMEU0qySs5LJXV`K8+Hc%2!wwQ0KpU^v9D-Bq`hgbiHtq;{o1uv zljZF<3BQv1(%q@tjllx;36ETk7X`JJy1Vm-!1H+zCo&$25?WChFyT2HGhK)&<Dn4NRKJcd{A~IT3+m;da%WqX#=cm zB%0&r+#jH_Z+jjw5KF>BWcpVArFB{3DFo7)2O%7eC_E1-A^M@2{u zj1#!8zbQey@X6dwB6h7N)$IH5BnAF+1iGFqz}bE=T<%w-%151vU|*2D{=@tWVmw`S|E9(NiB;Cr0cIiJK{Uu9J&Gy&8|6|OPgWCNM3XD;Z+?rU*P_@o1bw-F@KKECwtc23GwUZy zOB*eGJr?RN%aaOOzrj__yLuf)Bey&T=vA|ed+c&JJeBM9cMiRM8rL4aT!MTWU`|Sq z@5_El1?32#ROq=FpISqR+5t`G@Ho=xFVUfu+5fbM&?1IFBJ~{jMiDIl8cY0(L9PPi zCro`V4~hdR4Eu+96lx<63C~ImR(zuKU)*=3bzIja+7r`2+rWrfNEVBY6(t zNtu%{(G-~NIm({oi%JO#7Im{#*nDTu_69;YQ+I9Jma#4`O0m+jvNOf_T^{|CB3e?) znQ=fXgyV^eMD>{=c*XRLX=k{6wQ79i*Vu5mZZCYWCjmjiAV=-BLm70XST2@f+iDjk zC`NZc%Ga?lzLq3$e_##e!UDI%p1~$cM$x#C;rRGutk%LV%R7T7Bl=@{zyfQ1o@?Ao zRGfVsla59>uuK`64%&1b=69yBU0vokvRoS+Hfs5f;A7}E9dw)9ir!%rgY%iH+Yb8I zY#ygnZB-N*4TF4xb!ZE8NC0UYTdbMNCj?oI9luYsMi(7JzabfId5^B$nUwJnWGKc0 zoygVz%c1y-P4DYNQqBogr#g)EaekTUOUA6tFK`^XUcyIK9>QsDHcaba#0O{VEvh6fIAL90M#^oQOj+SK)A z=PNlU=!yXhVyCc2Ln3x{b}NviZ$PIY&eV{yZr##V+81v?x+6T`pn(ML7m#(ruN<#j zdHxE?g_nZyd!XJXDwl{yF3zkHFCiFqNMvUnm+|pq$I`!LIDG)`EQ#la&{op2gOBD* z(EIG)t2QG&XH9!@x8oqScryZeZ7XNJTkm!T6${THqBjND+&R3}MRb^94%Hx3_9jpV zNX&oD{Q2NoQ(2Tk)1NOUlzHYJ7FoMZB9RBFJqT9l&t5_!&4J}{B>^%fxq0ayXplP6 zX)=&C6>6!we?hoKCY)Qht+OB}eA7}kcX?VszY@og`!`I+5>$aHwP=p+Uq|3^3g z|E~}a{@0KGdRe|4|F`fu{@3IEUyt{H3PI@qUkj=2vpK3Pq%8+Pr%L!I(*&K0{L?So zb*lgAFCzyGdY0Y%Rpdhsw<0^wKOi8$&ktt>@9w^JU~&EM{j>gb0ys!$Xm-i}KgsL* z|M+Wr@FMRW>P(cAl@AVrZU6!=f1fGp4|HI&zcoq`%DV?L0GTY4V){+4yOl{#0`CJ^ zf+O1R@3YL}mIgbC&W`=|rZ{7?k@1ab#gFg}kckwq*MwC6)!l#Sb}uyplYw=y5-{$x z=2ZnVtdj({3BqdwGXbsKvFs5b&4s;&eaqi3-;R|$rGJVIYC!Lm5?J=`B}nc592PwI zD_FEV_{HfMf5r z%x6}380;L|dCy*DjKz2dh`-CO{aL$qOyp)omzVAE4+Xbb@+)iv2pWB?8c16u3*WR; zo5N9ya7jv*pe>{Y-M3|bHpM=l&$uudF!(|GYAkJ01n$WjpiBUsK&b_Y-fFP=g$tax zFZkX3Lux#>8R`Y6TTsBJaDj|VKrHt)32dC}E>WmH4-KWq9l>1ZAHw9VT{j3I7MK| z=vJWg$0N*%D$+_n>yZOsPc|l=QbwQS*nhtfe+b!sI?Ge~ZXkg{k+apuKM`LWPD4=I zSN&mn=4~Nqh-mU`Ztm^{`z~hnlaJ9*WL&oD&kFP1V7uQXw7C*>l9FHiQqn^ZF4pqX z5ya_#Z=fPFuaQ{=^eyd3g1LqZ49{PNN3!LgP9fg%NJTIzQQc355|)C5LhCf@vcLa7 z`9<)^^7nuT@p%kOAlQYcCLll&qGbP&No~2vhF}C6R0f8L+8Zk<__sTt+*r2m&tBRh z++Dr=zyE?C`M*eA6%DW@45U$-%dIwKBBQ)`%z*2IXmOe7zwFn#^FKXIz}i4ZX%q{Q zkScB+!we1vZr&3*7jX6fhVplywWIB3N6Si#uoERlN#M?*=W3=ep#cqNba4LK;nG1b zT2P@?-Pj2dYUc^?R|zV7vI!6(I2)b4e0A|0RNrlByQ>Ka_A%yPkIKfESbZJao9ITc z)Y0r;-$Te@z!&h+-!H4-r_TqJ_Xm ziY|_y-_Cj}Ru+_22T3jPfk=9nj6n1uhHYFQe+tNhNpBQ6>hoaGp%Gqf+d3RWe@MSD zeYGp-l{8%}Nj8a-9viNQ9?SP=-#5 zk5?Fgq<|9wOb{-;A`wtCQ`UKLJD|%aRtNpzF$~E-??M|bnsE59Zno06woZy*l!C7M zu#jrO8RtY)fxXP*zkm0ue==HrV?Fs=va*JTJsA9|sbKq5)dJ#0%jwpWrtD%F8S>7Y z#%7fcwvbON1}Z^8%W3Mg84uPEYQXXdA)rzV9lFLq!fGL8#T`$8+m(E>C%aA0{^ClP z*}rMsFUcXRKiXC?;F&QoffIA5-t;Nx0d z%iUBls=+&djgACjHVBG_HZy>etFYolXLn)G{w9sD<0li|JH)%esF466v_qo|cC!HW z-e-QvH+BjS&X;R3qqpDICn`+c!C7M(9FI9RP|U7qxCO;o*g~ zfoRey9R;$MTSn`>pr1Pb+Vr;ZK?DoWMqk&T`*ltE(qHS5SRkMqzJ2?ac`q^W1jS2u zYn0#(5~A`MAQm7_YaI%gxrSL8I9&rBE7N)yVDT<%fH;VBu6{PbH}6-tm);6~pD_SI zM=)F2j9s)ETsn6?BJn8YCV3(N1bp1{G_%fF=b>1m=ZBN&{!{_ia|YCy-`m-HPCqyrw4|?P z-3mmj*)UfJA_9;vGX=UCw0k2`2O4Z4Lq;QB2g;Yl0ciIrC?u&R1PQ9gD+D9-hWU*> z+DQ^K1|2lTz~w6!ESsFu$gO%=Nkz*aY9~OnT`~5@+j7Gos!|$H_oMk)G|^d+vgSe)a%2sK&fDop?~E|c!t&=?&L&3jx8bUdghD!=lvE}`k*6JEN@*MQ+dP5x zyJnk46)?C$TzZVH5gX=$2tI@R&`V4*2`b*+nNLevOUd{FfO3C;sXgI^bLF3`qMDiyuka^;hj4k4I>=b(qVxbGrRUH^0N-zw(W@Bj~J#jAG&D^!>| z$aX(^ehM?2mNhlnf^}%9%fXO9-r9&mFYYM(m&(nu#D83hL$naCg|>!`3O2r~zQU}$Kq0|n#>EQW`dkwl*(76bgP zTR9dUp>wm@nNF+@!szEy0|Ns$N(2q|BYfmOz9(biaD}rxuaU(S!!YoCQv3mWR?5uY ztRV59oxAJg?`StVwt+Qp$}N%l3&KGznnmDv69{eVG4Q&3Ufqq0jTLBt!Z|Sk=*N3T zpU&+wVXEO%AdH4wC)Vi02*Q}_$9A%kJwF+oiYYK0Fd>E1^xqdmL(*EpD-LEo^%*xI zdu{P4^cnf-LScPXJ2=~#a>on@fHBhzB%H8!gaqGGeWV$-5mLzqAMGiERLir2`n-jOkD4m88SR3O( zK3f94S5SK;+FnIy$JqNe5t8%x0LEHH&|O38ug#_MO{~S@*UUE)mDj`m6Sf1B;UM0u z^e84+j1Qw-3DX8fsVAogB~#J(B4p?hcT!f1Rb}f1hg74v54yUU#f{C4s6+T|{av0A zelIKJFm>Hgb1jk_IGBjM0YO73Vpq39mLf`)6lO)@-1~B?6{9AnGqDTRf4oE-A^H?U zQ!NU6-1NjU7SV|$rpS(~VRc4xD@UPqYQIfBMmWy=#Gyek;`QOt5ne(y&z~SI=fYp%N4UqNG@Hv%9e9! zL|B3W0@T@@ar&k)#%|R!`vyKMWOe=YP2-$v)y=hv4BJreiBwch4G+KqRwFhVZ3?-e z*&!pB999m_ol!&8Gst0C3f^&6VRnN@>lyc}id56+&n|$Ig2AqLK$Kfb&L3ofCf z<$ zzR#(-Rb+`m5g(;t5j!O?|9-#}Gas26NlwOZqAU-5ZDzr_AdOk1)L=6i%8c4+Lp`yst1r{{xxe?(EG>sLC*_nJh>QvTxn5fZff~ z(kb0Ky&VR5v_pjpsAGuBog&EY%>*M#NK_2dPv}KEu6*p)^bAKdM1-Z)h%_$3=D^>H z7T9V)Kd#iA!lTA(ZUJ|G78$TszFqwE*6cvja+j!BrrOFXnC z+uxi8!G12R7@n^*G=%BIlQ^%nl$+^!y9hf@Tf)9bh7GE7?4W*b((MC3XD|GGI5*Yz zUisE;QgGcoAu*9PQ&2)|0_Q2&XinDQ2W*0l%va+E9n)f;VeH|i=8i3wT?WA<=8aGm zg(KMrbF+q0WMcCij(`MUV@^c@` zWNP>yJ>H^fHQJ}^c5GSzSoj)Aox)AcCj8ckkQ60&Xt6HE{!wQVt1=<5q-2m9%N_a_n|4x!`B_LC^&Pz?;=o(y}ub;=0g3KyDYdjJ=i~>yE_7x60%bYdb&s<(h$A`B&D2(-b-Dbrmdap@L2;8<3Ru5(BHFj~f>Pz0S} zgvWU9&p%z{9Q+@H5|#V}M??LEXJLP5brtHZFDfe+G51MW(aa|mp{Goki{Yk}fE`Dy zXx7DSAACGU?{n*#3#}G1k5@pBb{V&Vb1|#%u>+M^sVdYuFzDCZOibP#c<|FR`Onbs zB_E1GqW*%faO0vUZ5;;g{=#K0$toyRBlMltn6n#`xjp%xN6<@!lm(tCX_dIFqQTrk|1IVHh3%o5}v4m`47eidy%X z>JYtFW9nsPlh9QdUC4u~gP`{9ZU-TIY~-fg5!DYwAw8!Rdeg(c5#?m^=_j#O-u4`1 z6fjYkHpER_{D|&2!Q%zTWANkqObkz*qIrl}cukkXa5!~A1`OkA6Fe$qeedo3#&9;( z)Zd`|)TXXccnP@TQfcwzGV7$dP7$t12bG71+mpXa7+P<+wby@RyW-*d7c^I7tZLr9 z&UX(5#Y>IHU-u_(t7c8ks>#qQm{EHFqqAq%1drLCX4<;~qkWg6C11@%*;}uW5dAJ7 z+(*lyp;WWs3lr(S`GxfjK9M^vRn4Y1_&BcVx$b=PvhOuR>e*p%A#xk4X})Y5Uzh6Z zQ6^R5?Y-J6QGd|)Tv@5qBAv(<-!QMedvu-`^;9=BJaq`m7`%5bG!V1R7r;y&fA1-+ zn)uLSZ`+EabmB>#WVw&WM4e{>if;>h(w^{r9r5BKoG| zVHv$hoQ;i)jB4ywujGW^n~M_FN)-_Pe_S6cExUTQpnPe?wqy(gWytz!deSk~tTSdI zEv((VK7h1FcAq$9fJtcJ`XFVoORcZ#T3aPgMt!k7ls)_*)3T58==~_mu}sC24ByjD zk|`yVG?QCXMD1k^?szeE>&xz0@9}BegKB&^)oj?qpfA@)R$YsGI%o3u?wC&S#kjzW z7XsP#iyRpLx_=bZ((S1_pFHyO^Wmh$mprWroZV9uJ+w$bdl+K|1qVc#>phgo@%Dt zma-*=`MJ|SQsRz=qy<&fitmcx%M4<| z^!Tbf6};{DMdBwWQeB^&3%n#9(BE&WxqnI2!@~nP@cj8X6K87WJy*u7!j=xsY(44X z^L6*3bFxT|R;*ajLtNc)8ylPL_S#%q>;z{ssh*lu*IzU`w(EKSx{aTT+D^O-jt`zr z-~Hx_y|n03;48xzAG>F*^R1U2>?L{JK5iqIYLc*C=-i&JSLQ9-D-*W{#}+v}e>2Gj`oIR897!>i?XMt51Prw0Y2ykGdR zm`NSF9eTR=#|uR+PR?Dsb|ut>@1zgjHXeR|qrqDhvacR1>G~#ocGEqM>kYSYn=MHs zADv?5u(A0iCid@Lev@49v!3V8=FKOZlriYE*}V5TJ^MY5Si0)qb9XiK6%-Zk=B|5X zt3swfDsShhBii#~-;993$fjudbL(UiSoLLDHZEM=c)hagT->v*YNFiq1{-h#-gZWB z4K>I`#l?pl2eS|@XJ;>JGVM$FbqCw+)bYYt%K2PQe%o;J^qQ)D?)CW`N2^A)o#hhD z@erP0x09PXl7H~R!TlsNGc%;=0A*LLQAWLG^|6Rec*`B;hdO+LK|Sm8?NDk|cjw;X z*ryOPRdhwzS}(-B#X@pZK=)_bz*FA;e9z(wXY#Y>&qFvh&l?!5d|hz)dq&FE^K-xM z>MDGg^ANMX+p0`ybE|n!@m-!0*?8;5`){%drq)LED}P_jVuvR!E(anPCaIe_lmx>0ORxrN}q;AFam( zvytBxY9G57ykh6C+tJ*AxY74mih4qnMfs5{Cy(S^*kd3bqeE4!Lf#nR&PCe8#s=#g zV<#sklxKJLA5q#~O@sYbcRQKRnuD}%&5J&R17`!jN?yEUvgTkVbu~@HhTiJ*SN=Nj z@$u+*ZSW$d=@D(%WQb+NcQW7qxcXwoxt*0SpN-z{&S}-S&Uc!XB+GM;3gY~=GtIGK>q{{M+g^5lJ$W1%4>Xg{&}h=XnQF>6kTenu1*4!HzB$A02bjn zhYImtc24Q-^zo+kd1f)syhkNAbjM%Y{&S1{T%o#T%yWZf_W_>k5U0^(st|}F)>UYN z&N0R?{`R>zXOBt-rg_Vf7IQ&b>Y{Fsk3Ww?ov@yd`;b#Cd-zU zdoWP?NIsKGjLvo;&5-1*tSp?whskKi?f!FLJsb0@zdDm|iNt4gK77Ml%pm0)X7}RX z8`JI~dZwXnge3K!4~3@cyx<5fo~O*o$`T*26;xx`{-xvh#>$W*!;txU%oJuLs((L< ziU++(34@17`QIZsDlh)NsCFh2sx9X0jjgHLSoclWh;@I*zK8v8XTHB0d*RHWg^{J> zx5iD?GzRD!a9ymn{OgY*F5FHAt=@nYieNJL z*YTbB`2kTGGHkiVc=_$Tsi>Z)IDMVC?d28MRy$u*_wV-;ABAATdDc%0F`QKA!+*bv zXIt;3c3O{qzB5``Z1TzP-6r~f-ylAMu}wx9GZY-y7-qjVhO`~c_&2-NnH2uVEkC5l zl^^l`5q0g_DKdTcy5I3IK*yHN=??$T$c2Y0ar0Qpyq*`+3x6pJ|4QA|H5(0X{a(*G zFI&F0XYM7jC_lJ`Er898Ezn-w(tYb~Qj7ULPTd204u<``0a|<61qB@;D{_m2l9KYz zyF-AiCdC~2?z24XR`Qr^uh-R=2YWwooKEs=C|~~UKQA7L#w`wO>;Js>?~?0nIaIUi zTsNKWTV;2Zx033w6)Ap^x2O8eo0I=OaXB)(_{%^YxqWQb66;O4Jj@!V$Li$i)FC&@DZE`B&kVNNB#pulbymDt;DT`hU_QTBT# z+#MD4+r`dLy`MQB>df%_h6;2b{SKG+1Yk($mi~FMM4VJ*rHm}P$~W<%;z?d=cJR54 zH2;3;oDTFrN>KF!5%}|=SKIBp{eJ#l0;}_glt14=x4IP{THjUkUZ?oI8gz*^+lLPy z2x#Qqy?uz32Z%^Xl^L9T!HuDZZ`MWk`(RU>B%=y8f4(8ZpPxN*29@QW( zboBI4JlB*>uIst_dtcD@vU70_L%V=p9@gFW{+tfj8iUjBX9GEPv%himD#Qs`^M^MI zUy1m(o%;TsIQ6QJp66BNvV{0V_xw(69P7S?FcDwkwI(6e7b}{wu^R2q{%o|GG-p9Z7GHBA1Wz&;Q*rOGZ<@sq1+npQgQZ!nd zo^1{OG-%q=)TepzS@*3xmc6zElyvbbBX3xub9 z9+R}Y5>5MYSx!#QFLjX|$rUxfBFqz=cvfcyks!W<)5-ZRJWMG&B%hke$OoD=rWiGS zNe$02&32oK5uWl~QLCSEx+PC8xw2NPCOnruc*%N8TG5jD=WlU`cQ5UUjF$YhqZ2g? zt)4U*S+qQHo~~m)U2#C@$~ZBO?3M0Tz8rOx+`F32D^Y9E90&>`{C^7aS0ahJv$OeOH}uyCG6qEUAVBbHFnQJ5wE-U1W@ZDuz59~~ zefVJLeM-vyOJK@Yey^X$hC$EZ;$(Vm2#4vbfdFCNqt^rJQ)ecl5#ok@JOI$jyOx_U zSCuG}yQJoW9^~Y{VUGAxEoj}aB(OW^+`ctO50th&wEbAbJ-tI9zPm5b*RL)iMR%$U86keWZQ#P`?gLB&+rK%{VNcdvPS2Llgv z-GadcCUJ2cD_5Fb7C`GVBDKiq;_l2paY8{s0dr>Q>q}3?#KdHTZ;tV7ZEZy$1hRJH z=FO0u&2BN~{Q+yY(9^U2et|v9@-w`2@(VlN@2}7IKc?z350SC#;raoY+P))c8!nji zEym^BYoAkEKiFcuZA;jLhJbVq5xwjk*@lOv>BTP(dVXPk>}wd09AtBl^4lrfn%dhbKcfz&T*37Eo_(6i zQ#%!<>WH-{B<;R=?x$Bl!aKcFuzVAXNnf($qL#+>$gbrZ86_1u8b8*3WE57o$LUrj zb!?J~x?-U1#QRfdY3tG^{8$=UioV!FNB-euaH}lQ@PvezdcNb#@&@P12dnAZ!cS7Y?>!7XU8Gh;zWD9o`ahW)H;Iahis)sa^#OJhXR+XuCr`i~ zc+Tr!W0U7NMCg>+9EwYbX~zOce**XuLbDEbs*&6cV8Z2VJ_)>W(Q~pK;_1N5_SrYYf52!Yo8l71r>*n${H!1@tioNqV%LOsV(UGjfjrCnIfCp zv3U|s&q4-DoxEjS=OwsLC2bShwCmvcfh2vKl}b^PI~(_Z%UtRo&=;~wR!tQfR93Eg zt+9}CWim)#*iycAv1EGpN$WEzrk)eWk8##m4qs1Qd(x$g{AEje8f~h&{(0^*Z-*;8 zOtg;puDx2VnWk+|d*yvGsZ+!9eYRO!j7jwJ*BYtC^PHS z5pN1x+fd(w@|Q1PMoq}O5gXt}n-98e-A;Ja5Lq$WklpeXz;bmp#g~=2<%KF)Pt(&w zMQmeDmrc)snDBLYI5jo(>H5PhyYxNZqo2No>l4hNG~{{_Na<*ne;XUGN-7n7wso|# zlqZ>|e5DA6X;CC!u(W)!xEgc57%7<=AHO~9JMgR4)6RcZ_|_}oG~LdD4)?JkjfBHY zYWL*d=xpno-NeAR2HiC?pZ7N+gxsmP-mqK~@ve+_7;atZI730RyLerdr0vcNopD;E zEkRS9oFyrRgB5{oPy6R-o+`A&y>p$tml(KYw`RoOXZh&FI3>H@k?H+kaFVR5V%Phw z!XbBQVWj(tmt^Lf$)ycurio|r*`ro0RPX!utl$V;U$N3^lBd-p@6zPsyLYLTp13b9 z=FPviPD=jt{5H2<5$ld+;_Usj@1Vs@>Fj+~It%#>+m3xKEYPx}x28I^)^z>wsc6TT z6|U4K>!00*%JsF>Wd{$;QK01ib%J?Dqn@;^prSu@*zkO$%XH*}2gOz&-8;fPP=XloNL`<^ z9Ui5KxBJ|jW^+it~Y`ymbv*<9Loaa&`5D>GQ}k{K)C$`QSzywP!+{iORD6pg3FVq5?C-y9~nZ zm(FT#DAiuf7>y3U`7ZXfMYTSUhR(E~MylBK<_97hyq?8*x}EEiy6HPGcl>0-P+@Pf zU{WQ$*q%`s}?}^XVO?I7eILrTNl#Sa-724d$CqWND6HjH9M%OOdnqU|F9yaJT+# zp2N`c%?y%Smp>`2V%kSrd3a{%%1K+&!ZO49WqJ-$dtxt$wzPW*TC|-g4cr^Vp?t`} z!C|843Q1gTdZwJoqD@`*dy>`-t&2ajycq5~Hg+ck2M?yq>A9^j{MPC7ee~aojN@vM zO%%j9jm} z^c!2|Z#`tbyfFf$ZFgTEOLE!@`(!=c>HVJwjtU+B6vIJTLF?GG+}zw0_TVkNbaZsP zEYQf$m|}lRs5sVBSyxeS-HOxr;LqyH{yxVl-I2A|NGXrzu8fiFOB?1ctkvexnRKih z@=T4M8uTh03oA?Pahcv3+n6yuZxc0;N!R6sMWlR5=fxp|jk%wgOAoL9@nc@2c5}n{ zrvTHZyhj(V--wpqXYRMlb0NCGrdGjiq_gW&oPY^?Y`nXQ(&}lilvBxdP3adK=nh54 zEV9R?;17NB&}6{Nr*B>)Pvx9&)}sfTFbr zV0ZM>8Q!7^oRGjWnAh>>_+9YApE_djJ@Jf|7MK$n4pfI9K&HU| z1i)5weS%v^9!aR`KG}Sp*WFQ1;+)XSw==TcbJb+3-6SbfTt`*0;O5#H0c zUa$GG@h6gUQ6^fZhn>GIzY!Sg193N@HHz=QG4SZkc;9-|x#MnhZ-dHQPC+pIxorccD63s`QI_DTQ||C>w)mHxmxt?wL?9NM?rpCR? z!?Q+TnsHbR8FOh#Z3yswZYo%IsN*Sh>9)+*F?Shbmp?GU-JZ(I$Z)cAs=PrdHNvEp*zzI3x)3^zU(s9tKN{S`2xO>chhpB?YcNIHGp+xyeZa>u7;=)Tx_&D}_Q z3#2@M%7C`8kuz`{>y91z$;uL0 zY(sjB$4!fj-#)8Yvzfu8AUk{7Z<2Ja`y8dr^_!{;Y$Wb^@iJ-B*z@vs(KP#yMG;FM z-mK;d{Bv&V+O58G=MF^bz4!zj`c*VtB+?_HU$g86ltLdY<|WF!QSr%g7-~IXU%+s! z52~WEWYHTcSGh4Y^Wp#p2S;s{j#k%GLF&(BSOpjJ(UdSLW8fFpYHgmEFh2(|M)8 ztLv)JZpYF1?^m|XmG~+rsew$i`q9wGOq(e=J^|_Wto*?qM{VswsU=CL@k$1fh`I-) zg`&xQ0zqjjD|i1+a4S!e(eReHuoyxhcX4s49vp(U2%Bz!z?!p=-EDF|w|+iM*qU(= zEZK>f8LWc^s8`E(G&b51wQ$nW7?rMvZ%>z8zJ!o@bj$(-Ldln^NLvk$Z<@=oO4WM) z1l0J823-EU+~eLHyA%y{Dt7$_OV8T-kZa zMlGRkRF~(^1tOYPHzs%40N0USxy=5qy5PlXD=XylIc7~q?>ATN5V6_ge=Avut3g@` z@FY5yADto+bjl+OfRLM*IDZOHs|#v)QZ2fnP zt?oy2Kp|-<<4mlfS!dORz!BN~$`MQ33Zd;~JEyqJ!qZmF_{TPZvZ?&OF}B)IHU_Wt zld>IJ-00R1Y-QHEyzM^AO|RTe1M;d+{@-~zPNVeOV457!{n;FFVNQ>XzM{GyxaC`h zeV?+X8#DbQ6gyj2(w6dPBzJ=sV_KG z&}EH{5kpOy`;C|VuKkWqaf(0Hh;)fp2)V>~JJJ5?S?k>n8oJ6K_v@D+Gs*h8+O__& z@&2vxdX!t#HWXx;zn5g{@bcJWrcC$4vD>LOK{dRrRZo^9Jl8y6jg_2#36rr|^?Ehq zhe5BDm~}?{s1c53fRux=A4aze%$<&J}JznJTQ?GNz86 z<@ua(8ZVmOFJtZ~(S4L0Fn^czg!75V9?K7jT6fn+RP?4FM7(IsPBx#jcXT|lw5!1x zD?ZWSanl|0iHM#gMG#~tH*E^uskgco%p!;0YMM*Yp^`s?;pDyKv3!13>7!-MXnMW)gf8wgy;>-FQ!kH}^V%9Ntr88) zXp-Uu4|73_@8T3!_1AZ*`RQj2c4xfY*DDqE@wxmJ+wb3GZ1+^Y{GEqrhH!{nKa*k& zs$a&Vo6u0)wOJPerVP7x^CYLO^*IA8`*+VD9yl4rnQWeZJmqQ72~dkD*RQ9VMoJGx z!FdiVd0E+etT9?V zMLzgAHn7X!!taw5Nc%P;G+0B~l%plN6zA%7?we!G5evx17yI5IYf${gmeFIDmX_-L z@?;$h=`4T%Rp>DAd1OQq@#2EFez|)}N{WrhsfI0#jCu%@Q^cTCtNPUj3-HFzH?9Ib z6K1t9pd<(F=}b_nKM89zj}OM=kMO=){u@YRqDi{xz-yN4@8@Dgg z)GlpFp`}4-prwJ5qM@NuC}|L>&`$0KB`u!N5NR*krAb54kU}XHDU@~^?{Vodp7;0p z{Qi5Nf1VHS`?{~|yw3AGkK_1ii7lK-w-b35d1`V_X)D$!5(Uykb||l>io4w{_ZDs3 zkGh$!Yhy9P?P@pH9eKI=5+-Q{GqHA)Mf;KK$|X71^0co$@TB(3MQK|5xDSigHtHS| z1$8F0PL{&~4#l*rr`hO~k=QmwY%xkX*IDwJmXfx}t4nr4J}5-$cR(g?^dcPUl6*O= zJUn7rPUPLd&#YauE2ag!h+~sit7~c?z0H0k_V(>(#WKz$(4Uv{52ED<6{27{V+Pta-Y8Gh#!mY8^GuZ#Lw8jEv5c6)b^95PF$`BoycqfL6|>A1L%nTYAi5FE>eZ_`80JcUDZ~N(bAf9wtH_@<~5U3_5owrg_w8`$=ZID9M< zIQ42N`{dJZmX8@TiuBB;=!1Nnp^TSMqsnicE?U}I`Qz)2Vb{A9YMX|6o-x*t1P88O zY+juIF;jon>xXB}t#)b0|-HyDu)pWU#~>ET4Lanp8*;%0>*50}D~yI~0v!P+HXPp7mCw@u21Se>!W zw%N%$&stmKLM>x%7W^Uxhlmx;!z{<8RCdjjGG^$-~)w8@XgpF-67w zudl>M`}yrgvh$yPkjSq0bwKo0vH_)Y&EI|TVQ02=>%R8&wVw@z;;I!9n?C7O`9}+x8i}8 zVA@-Mp6^_fMVxnYBD5&v^1_dx5*g^%{*g=1r==uZ z{sM*!J4fESH3$0C>o;$%T)zAp^iqu{GJiPI`JnuE=|au0tFTeX^y)@^96hOZ%_qv?fH$cGyn5(`SS@$Ndwur z=6blg++P+O7#c_!(l}#Gj_{u+$ThoqdvxRde1QiN2Q!xOdO!Qw!Dc|3vgbCV(El6W zV%6jU8x^n6^Agr#8Qme@&_1eXVHpxD*GvGSn17{R zYYRb5X6pZpTG*evHZ2S?5}{nf>1b_z_EoS}_O`!2U-?FLO%kx}f4?P~4+*BB-R?X* z;ys>Ef~mG?{B`{~Agv`MKp+?W4MG@?5Dg(AR`|iIFd*^%^SBUI{v$5QKL8^vJwmg* ztSmXV(YU|2Du6S{KwNrGA@vyYes=8dV}UP$p%q}ptK@0+d#A$a^aj@dXcpKPC4OkS zl;Z!fI|Qx;?C&s9bG4X%3kZmr;agA_U-=N3sWa3VaIH2*jZWsATjVW2!ICPI!S6g{-Vf= zR?44Oh|{X0lL16Jq=-U{oQTbOEcAMY-h7K>g8YxM;HeJym(P*Ly;)@p;Zg4wG-Kg^OG6dnK3w%pYaoM;yUrkf0Px zVI?E!VDg=jwAo-OX$7x|y5LWOp7u2yroWy^)HyOL>apuL^EC;`6bS>l-2c<=7m?fE z{^8bvX2k_5aexDdrUgkI^d~|jf1Mq+LKKUE9e9+TJvKE(IG3L=NRs?B12l|lnuouP zj7SO1F|3bq<`l% zp{*7k-jKOX1HiMNk2mWIOqT+h9)#$n&IE2YXCEnWxvNH5s9MRa?__$^*u*73D&=2S z`Mf+#^s*|!9E@R`0V?iTBxCC`U~dzU3!8o;s3HiU<837`LO+>94#j@Oi&JTDmG@lp zCIAiusO%3^#xMgd90D^74R!y3fO-*r`PDxw14x`hHvd|h&eoM|H0tmjCA;@D`Tm4S zm}dkm^=Hq7JNLTZs7na04}8O?ml4B5gd)LVsdZ;Hdao{?9avq`VYApkF0{)dA@taJ z6HTxN1puIsn>M*AzKo?isQFg zZ=ahuGM~SH-A5X>dL`;>wPyqOT63`IgdZIGGVIjvY>;QGmw3T)P{Y<%sNe2wNUv-q zL-)0v&Z2#i;}*4>KPNx^p!D8_-85eHtR1Cn(EfJpt~0oz zbAo_KlqKnrd*R-sh*A54DC?tJ%-cZvPG*JZ}sWB4Mtxw8@k_U#)8;?PQbcR>aNO1;g} z%}Y(*uI9Mf+uNHnSxm;T;y`;>R~PD&fnl=Ym`EGn*VNSHW@HolKKnSii>9U~EiLU% zPfAKk{r*FMo>qL5tQ9aX8WRQE@JM2~sI~*a%g)QIuBvKSEww(TbFAfoC!1gZ**5w` zc06*w7d8JN%?a{GT5JoV-s4=d33F1pF0(2X|Bx^S&)qNcjX7Bu6ckkSJVXLV zGnUqIa&zzSn43W2>9SspkFP|nTb6e;W=yLaHUlmg+CAom)KX_g8{UE-3jk09`?(M6 zw`_3$FYx}1stOfUC|@G2Z=`olaodqIcUyE-xauAq&3Ii5bi~Zaz5qCqK8|)uLlfm} zxmQ&D{#23zmgZsGs!gpcoz?MYM%-WRDuCJXfBYeb4Cv6Gf9Zd0RfOpBUsBak2pKK^ zC1W_9ZpOb~6XcFFV_N>ip6$wo8{jUp$kf1Dm;6O6~Aay_b-?6M) z=Lx|h>`>U@?@q9{xL`GjijEFi6UoKJ7r{6cyY9Ld))&Z;vjg>nXf{lIe-B~`{2+Jf0xGQri3Lhny6u_@Tr5GJ=591B~^RSTBW+*%{tlJsps_hTh{7!rKD~zg* z0eD6FsfN!{O-0K#N3EL$)l=}ksR<~Aqy>pX#qzn zVw#%2+8)6*$bw?s6Aawj*Q{KhD){3`V*z>>fU$hw<7}n-T2JR6CWfUsK{gly85*!c z)HO95!e8EqjWvS+6OiEG#vhKtBgc-8jh%c~Bx6oNaFQ`#0W2XX0X%aMq=j`_ob0eb zk?To$dTHqb4v1Fn2Ek&i{@J5gytQlB0^t=oef19V)Q=6(4BL7#%NJpe8LdvB41n3o zIrt$rsp?a1(DH=#0+m$3`7b8Ay1I`G3^6oNTYJr=4>%SKe2Np3lclI9q1e>x3=HO! zz8|+-ALKb+u17gQSa|RGCc#e}%*@|s#;lP8gRR#m8UM4HxDgv+?NBqe$w1K=l;B&e2M$^%Mn10C7W}kNKEhh++``EF#*%5z9XD zoT~(_mht8SI%phSfi6A4C`3+(8M-yVqobJ&NSx6*`xkL4(BXi-s{Uj9POy8~#16ke z8xN*r-&MIV3Z9p*Uah4A*~kPE&vD>a^rhYI0Yy6;2&!muW z5x$HVKLVe=v@USRf(i}?)%oIW48;ZQI8?TD>RhfouQ(+xr zrJ&~_t?IQwAtn24-Z#vYOv?lp8E{xX>bs#6rYA=mSRV&($}QRLX_m*2^EmE|-$j7I0?yN? z9lL%2*fB3WG#nmw%wv=Y0g~&Ra+E=yfbcZp!Gj0+`55z-zW|erK}YyFZUx{&oYT#l zHkB=K2E^>Qva))?OL?T+b64A**yEH-#u${~?ED=o9DhJ0y)H5pHUVMlJCKb-Lm3(w zd3(OU+5h^0nHkR=%hRVvVYU|*7QlMFa@+DFdZ=zWsNmMJRJW327{(I;GE^cVL(wxj zkxjjg*Q6U~X-N&zZ_efZVXg?YVwhr1R^MC~v+`r(O}|+UN4H%uY61cRd{8YXC@5&3 z?Yg+ObsR;irAg{(rz%9Lz35Ioh1T~E*9+@mPmMPV3X)qLLiw}A(F-q@P_@a*`t&AW zVvtrY++K}JiUF3cghSdk?6Xs#9nKk&%*FDh1t;i35etQ`+Ty~GGZ%4YV7Ga(7Y3if z97kiyS$jL1;t3lY8$cMHoI;C%sJbnEG92sh)=2?kFc(nj>G4|fz!8H%s}3o~OF#hw zUGf3Medu-N9HD^ZTS-X?8ep5&WZ{*;en!55i{X$ZBO`;ss!YvKPyd2{M$~6@0(-!n ziIuetk{*vtPu}Rvogx#X{VM31u73xW9N0k+C&T}-*oX}P4~}7=_RxSJ&K#)PAa6YB zTPt>068hyBb@eLa2)Bb8G5gQF`27B_(eNh1qu2n79&>3XkM?zM_9x(mR_D=rDnR44 z@@URA+z8U>>^B52xT#5p70ArqBOuB9?ZW_~>bg2%2%lTsFn{3*n^RFy(f5*Y|Ne}J ze4N!AucNF0O}lgy3AlLcNsk^qN=y_ONyhy7uxzX@T!=k}X3fm@Ug0Y$E^avg`M^y8 z4+I<&=?QL?v!$h_o15F=!-sWsU%=#{YXUT=9uF~K8D+{5WXRY@5azIeA_=IaprB`> z1E1{lwqOjl{kz`so!s!nokYV0^YPuqNdOScZ4Qroe?N;6{Vbdejsm5MFRMz}IZrK- z4I8X5zZQ@#Hl2;$>3|+Jtet}&1u7r>B@BlGtG7f!kq8$^50NPL_pFj&ZYcIcQES+& z$M>h>kD*2oky8z;0017p5#$2^FejG8DhhE#9W$;q%n(_n4e!MIfP*2SR{!= zs;*We{4_ouBl>?4i{QdPgPd-D^hc(@^-NDs8=*g?M%=syj2 z2m|=3ukT>8ZaikxH`q}?x6Bmj2SlnWFM~#$d0mvF1Dgxm-(&dI7QMP>K)gCX3 z4}GzF@;i1Ub|-3rhVnDak_V5eu2)S{Gg|l%MZky8pXd5&XpKE0!Ixwe(p}-;{IRJN z=u*g67OFg)2gK2!D?+pbhw#{t0V|CFhLaxb_5)?d-a)t~q!p zfP}*Gi4X#RhX9O-xeR15i9*E!f%#7w4cf;fuI7Mv32-4vj00AWasFNbNY zZ!tw!SOyP?9g3hqc7ggMV6>Q(o|Tu^7%kBQR6Ah{KsDS$Pv~h$@<>|Ot}(tqJ@^>Q zk2)l`X)v28gy^LTdp8OT3&Y7&8M?Yo%X(Re3NXr#1sW1F&V7Ax5d@Il(Y6v25~il6 z+GGHa&&UJ!S@^}^#rO9Wen>MI<8jtC4BIgc)H=u*LV7T-{92j5We^9gP_sOGR1b;8 zr`We~UrkdDx7eq^>!BHecdv~me_ihy?(N#z_FfB<(P9q+gM+!{odvi1z~X$KuZl;Y z3RuX?IK1(kiB~(3s$v2y;NrU}567mRSg|;Tg9Oq@ugzR56E(ko$&R>P7~*o{0!5gz zi=@P}=R?X3XkW#XkwesccTUP6#IKUAJu#U*RIHT7e4b!XV~jfh`{28^xGI%< zhw?>CkG>N92@hqIqE9VrZ*LE)-#>T`QLd%9vLAZ26ciNoDJDhL(avKB%N^e-7<;hg zbv}0BKH&o$kBdM@q!2pn=U+}=;qweLP$klOb?2=(xCpbV$uXI7*Yn8x7}-nA7hqLE zU&6)n#^JMD<8>r!G&D66Iq!c&{Fk`Wy~K~xUP~W0;TFo|cCc#K!R2P2w z_#qgaZ{Fzs;5GGa%!hC96A+`N4Td|xno}__NI>+DizovvoTMt)WmQepHfEL4THn25 zE=Xbk$!Xb9PE)uNBq;JXff}})T8YJCCcvSX{JWoZ^Zr{Av9V&GP3*DW&=j?zR{7M4 zA-6%rT+*pam;L>rr9tDsCF>YF_1TWIPvSOmlh$zYa|F?2JGkN8#S6$l0ffeB$;EPv zkh{Eq)=+vWQl~nN(Mr*#`YNqC*q9?`W}{ti64YtaTXV+_URs=YKXvLgK zp0i2&44nt&5VD+}#x_AT@#M&k9Xm=J5)m;$UV`f?0{ksnoTC#H)gi&#N2QGq;yeSe zf9(PmBCo9CEn@ICS~ih9m{nsBf?ST^IE!uHc_Lxk=+79QuIwUhGXPIX;R**HSd6&b zq{H{=eVjeLytmM?5rQh~B%(HqxqbZj@oRD^MCV7|L|fL&A*TB@+}r!|{(f$KX^+(~ z?AVGiikPUqUMtfXp(2xj1{AnzU%n)Ul*lojpp$MY7=@vG9S1|F1q#?qt0hd^B@Yk$ zyYS-np}|2!j@vswO=5ngMp|#iMx7y4Q8>B0@ot}mb7fPpeMZT=Q0yVJ1)X&B^7DJj zy~@Q(g9a~DFH;SX=FptD0$rMwnR#y4%RzDx`$VJ;M|w?iu5;z_0GqO1#Tjz}M1wF< z&7f4foMNOODWq+{yP!x(U3qjbKhf$c0rlEQ1Ehzhp#?J^ADBDZ+ALO1qoqK}1XHhm z-Pwtxaqsqx{u~?OxU8vZXhaTYtv`_nn~%z)?lyTs!{o7>0KzcVFORB)cOef@@q_p2 zF7q&rYJ*&%M1s?tF(XUv;fuNr1ucp4pD;H47`(!s$KfE;iAU-Ir1s*4u70&ewdD>z zkBRHH0+57)Zn207p+q*RmJH@^$yY^tbG8E$#PB}p5U=>&7BwR`1Qw9bwl#n^{+ z;RmT7?q)hPh(QTDc&|HG3DI*&Q#nZbA@tEZFF@=z1b7^kenuaFA_@u`OgW^>J`*Ce zvLwKIt#Sk4IpEijZpb|?hFJp-BWF$|8Ju@!2NZ{3 zGs;}&*zh?OCRTHvmq|lV%MLm^HG*_Q%)vA(d34gWC%3bT=nE~3=uq(s7Vpv0LaB|z z{A9fLtpT!lajRLyx8nF20bD%Z6tG;&fynW52@a&s@;Z>^{^w>s_MJoi~&E(x#fd^H=gbV z^MeV}xUA$5%%2L52V&!dZ#b04_j+7QkQ1*Kp1mrFj-Qeb9%0Y+mB&9Lqcy)8%qkr2 z=`jvI6SObm&Er)slSowhNN~V4_oq2=18P+-*C>aVU?%Q6Krz8lw=z#qLx;d$ z4hy0+o+P{lGoZhI`a~*2njF1R24Q{QL^&ds508avYiq%tXIJOwz*O3{R3nK-*33kU za?hd7?5d<}6j$VLAV&+7|BhMc5lQFJO5bBvMFWEU*^kF2H^$`08N4>ZG?hkoY)MEx zDX+@1eYwH-;CO!>i>$0{8}KKg4^6PmK)A; z7e05Z5()P1AB3b(6RN!TS7N6thg*n0M@eO3w0Ac$1Gvg~vSgs2M$f ze#J*6Qzi{bXXz*RF3yY|T@L9=JIrj?YuSy4fk`c~-Q`>NE`oyTwinGTr~*j6EBiY9 zk~q6m=7e%bXh&~I$7b@^T}fK(_K$LNi#->L4oj9=R``bTb*k>7B}R=U?~X;xDVmz; zyFdzN1%Nb=mWK9)D?*@zZ$K^`GuYzNCqk03YXdi*=y`3B4})G=(BvF31TZE>C^Zw^ z3t^qA6^vVq4KSidpzPHe6marT(LtJvf}kBnxT#i2o=Lctep)h};TH1z^$H2WVx`A0 zX}hn;G!AtfnZ&l@n~VWr zT|OqTsA{95ROh37MN-R0R_q?VWPacqvPIgx$@MzqDN6%j}rs8)7zlywP~TomtPN65CmoS|M|xhyN7VU zIJ+mCRurP?2p!AQROyqar9Psf%`u74q0CXqnu4vwTCOI*f9b4Jj%#v>7q(4BZJLN3kuVIqu|?>EfYE7egg&zA#qEOc3vZ)*%m#&a^QdP$ zX_b$k#Mrx!a2@f!D9_y8dVC+sE$L}#FZ51*QCYT-gb!)L5C;`8G5rKJ>U5V7X=R_C zJ9oxORi4GXy?A5EBid*#n)jcz=)c3A8Ief>7nd5~S-@k}ZUK{nfraMu48>hg7V3UPY@b|Em* zZK(&7h}q+y%(tQ@P$N$jmW|wts7)bd9%^RL%tX~QQ4vj&j@x9{%8=L^p+tmPnZ^hX zQ9ve$45$I(33ptYIsN$VZWStb_J2D@Tj|~kZ`!m;M#iAKSuyJ)N{}d^h+C+T>R@EC t1jLR0|C>j|Dg8g`N)Z?4O!~gGf??Qk>sm6})hqDdUUeO{3{^|t{{bXs{M!Hk literal 0 HcmV?d00001 diff --git a/media/images/cutlass-tile-iteration.png b/media/images/cutlass-tile-iteration.png new file mode 100644 index 0000000000000000000000000000000000000000..552c67126c0b26ccab8d4fe82d6d6adcac210bce GIT binary patch literal 76377 zcmeFZcT|(>_BD!aW5bSYLBNJ62q*|h7g3O|R3Q`<=@2mVuA-vSyL3@{-P91OmEJ>< zBoL4qAOr|Kly8N-&;9Q2e4ab*827I`#&K-M4kWz1?^D*AYtFfzxA)W(nD(>oXJBAp zQoMUxgMnf90t3UY!M!`-C*l$%67atr4jKx#81h(qU^pvE|Jot+AjzG9VJlVfwyc(`(R2c%tCseF-_a8%c6-=aQ%=b4VYAM!m@F^P z^D^|v@49!}vUB$%*%aTq=2sm)KELlA$zx-@qN{r9VTzhsd+wqJhu z?n|IQ90SA6w~QG2BQP*LJ9Zd#VEYvV7svnG3+_9E784WGqyO5Y=lj_-f58+@fH8C5ax^GA*MQ5Dv`c{Q7h z`QnU{OFvCKmZq;6Ytx_cl4VAiqN-}uSgf_y&&nm7H4MznpT{2{lRW1YvAf|D_XITudfcYI2O>M!I@vKV=?B5Pz5e5I11VB!7MO|67FG8qh7S7 zn2U&v3*8MHG)j#q_T=h1wM|`WQHwT{EL%PM^#G^q;PaaGQGuvY=fTgbw8#?Rd>X$6d^joLd9%X0GWy(>F6M;76zq6{3%v|A->Nz0zC4rLF@s0v$MBz|7udH?cW3M7YosbEDZQgl*-{U}u8LfW zU!oB0Vr}N&sisD}4Mvvy1*$^`c}7(@75vvwydS6cN+WN6S0O$E4<&4pW2d5&`@&Ut z{^go2FM|94C zih?d-HYT(4)g;Kp!qz?SVgH11p@Svo-<(J1*KC#xAOBPE$(yX9;Y;Y4m>4tM$)``B znkP8*;O%O>Dn~@b#c?W#6lXH7?mqI<$B)y&&P8S|L7xZ%Fi}gR&eJ~&>Xea0pDnM3 zTxWhETmRzX>)WGs;q12@jnu}TT!i-fnbKQ2I#ChBF!y=-C0R<0sIj())MnH^Fk3rZx)^U5A-e zafH$mDVNDE#VMDG_GDZu{8EQtmRa?9qDNlkd}Bn!i8%jCpRLVPLS_+sh*Gr#aS;~1 zC+6l&8C+;=x_U4EYHo7fDI=yuLzn*44cwRo=MDRZ-`^Y!LyY_Uv3)Ts7VoKCG<~E? ztb6q()c3WBo;^E6aA@18li3Un4Si%@EXS~3tq_lliy~N^48w}0T=YOzctw_6!l-IC^YzMT1JqlkZ>+*r4>+ut<}atm zqk8nr6?03bj~PnX-TdVBjY-rZsMp9Nmm^LGL1&^&8zVw@y|?7Gct|N5J|x!mip9tx z)+U*}8f%@`VL|kywk#>o0)$%Idz_a0UE<(_#o0Q!SzeR#O#6E`!O$9(^M*Q#M>lZOt~W*@XB)3*?Ixv338lhsD6L<4z!(D6%If#*#@2)>Hpf zeobCD?R>CgC*%mM)gC50h?{ECt0U^ttKY}&9biJe+RvPa8H*Knn(TrVlE(*0XJrAa zP7iHB_m$Cyj^H{RXxbFD0Wsc|zi@1ElSYZ=)y-3iy1ZbWL$z<>UNx6AM`zZh0;o=A}!MsoMBEMNV{6SERnD-4uv51 z#xX2B9L9j0omZ}uel=O#16ORBCU6AZ?~GWS>?RZWAYC=qjUpQ~(^T&Nus4{A%6Z&{ z;fA39N}l%hfPm+8L7j$%hM_qcZ+HdH88G%M>-x)7P zJY1gXhbNaD4iGZ(*>Fm-lyXiP8QZk8 zbu3wg*pn5gf&Tsu$V;i3DJ>m6dnH6=6zG|`Q&CB&d^8H-Y2@)2{eC52Iztr}kP$;XejAV_Q+=cvx|c2Ka<|aY z(?Vv=_85JS3)|^W;zrY(f2BG`dhi~69~vHhJ~=Qb$a{4(x=m(l{SPMA_dzC1+d1@5 ztKhkR@+EG^_U1J^$SJ)Zrp(`ASueXkHY&<&E$H%py7D!%=0D$tsL36Iz}5J2-*FY< zwLO>;$j*%Br$Yb9#N=mmlK-@jRu-%g?et_x>9vXTSQiypWbShU?u(xMzY@8`wn9*M zOxs4BNi5!LsgDkY0DqDsoVQardGN~lf1v~RZ5&|Iu)a#wP9Oq6VwZ3>D7Lu&MsoIh zG~#mAf=Zj@#YJ9}byj-2F9CPjcBJQe-%d=4Z`4r?p9noLz1+{-XHplWjj>Dfz1f|$ z$Z=g<$t9zBzp!1mxc5mu5MWbxo}omfBZMsDVvA4`|H&`)xdWXr#e@{aa9G1XzJCwk zlregjjZxle7J7Fl1Qiz^F627XU#|E9RZ*85yrUwta-al0td@j3`zqryl6tR(O zrNHDLBYj&%MWxDp@j{!~+Ew@1-ST1iK9KSP2x~+Cw!#^F#+qX^%N;EfROkPcaLyq~ zT(gfxB;ZV+eo_b)!TPml>Y8=e_-x&(G1Wivsxeoo`>fpSyUD@pM+#Zlhr7ja*O}Q} zFp*kywY7Orqd{E(1B4ZX8+-tp#rF@UcXRuw9|!JPb!TZq_JnmG5F_2ZK*Gjdw=8rd zX;huiMN)_VkDGE7xzGHu=xw{${1-;fbs-i>x#txGeuJ`m=eXkpv3~bVu>~e|oIdOK z536V%-8W{^(0S}~@j-{LFZU@bDt5MVDMw{|xU#^THnV#*0g?H_s?2+1Er#?hlq}CtCVzH`lPx8oD`ws*fZLBTEiP{Zh>k3FE zUNd=prPqapDLwGuIEkg|zQ8>xvldx95j!aK)JeZBSOr z2c0H)D(H?bLd+%bP7jXih>P0~-sGc%p3m0<#H>@ImU!z7ddE`;cmI@Bhqi+rcNQy` zEkysWq`4TXS$nDaFL|mSN(G0qdkcujw;Dgnmc=)A`Y@CQ~>~+h|EbZ)4Tm0Gvge?Fp$Q*_ZgD{AoJ`_fEo;hEBWDe*&qT#XM0AONcTtX#6G0MO zfhK?r4fS(20-blFSA``(<}y0VZUJ40tT8n%e4+?Jdw$ENE%XErI!h{E{k%t&Q4McF zR)4x$0*_sT9Pb@F^-lm?Wgv=J!A2d+T)l}*a6WJb4TW5NuHHs6Bc?^Ef5NuI4k0f2Ew~^@??CNigD=e`_PW;vbLQ&_=oi=j zePiz5q&$`)@HATnA@kN8z~Mkvv^buDtR=8QZIcnT?wPJPmO=SZ2!DM4wjK%L6BF}Ng8~D!nfSyUy$__j!b3l9jJTYo zM_V8r=j6;UR}yXkCP@3<(qy-Csac|ys_KY2g%_-FtjOW8a7mXd=FvAW8PXPHg#$=w3Gj_E;u z^!#TCi?@2r5z+sM8>S zUNI~$K14D0*(f2sv3U`SH&F{gvF+c#@t|-3;k`4eH4F`SHPbt!(Qtw(_15nH03<%x zYcY^A66>6L4bz2+N&XK*5Y_;Z?f)&5#kV&@Tq=2UX!;P(Td3bm*UtU4qe4b>vg z)p=k5T4RLjyty2|ZnI7${|@axAA`I%To>#^N{#05oRv*5RpmVaapvC&hwZyPtH52_ zCQX}$@$8z@1L{I2@A2A)2fs(ad&R^n>c2bMw+??hEOXuK-j$$ef=-@+-oI!ft)yJN zqFNRFISC1TD1Kz*Nv?&qpK}Nj%F1kD=>Vs7nqXPo<4rwhFyj}{1qB5#R1gO{vDj#S zLmmuJsk*|J4#g$B^zv9iK_QOWE)-W{&B=mEknyP=DZT$merdE+ugEM6(N_{v`sQ^x zi)x6%VFWs(b4)GnQ|{53 zp4>QRbGp-I{q4JVonOR)Xsw1Bbq{78cci`xtVb6z1RLW`a-dp&Ji-5bKVm{_m3 zm-d%f>)MW+E&6eoyHsy1;RPzD1>aV+^t*j~%>@7PB1)%$Q8RZ4ePy;WSH~HBS<|dO zBr>>0qxF0GC z6CIx1s}Cjo+P0#TsFoG+|5t*sf@CvK*351%}u{gS-u)WY{8>Q5w7P~MGg_wn&!4B6uGTvK=RYJ(lKbRk#W9HBHvIFofoxn+S+9*ln&{v)9LkX7|t;MbAnsu&g*E+^#;$vbiW9iF|#n|%^EW4l!aN82ABjH?2 z?rbojH@;;j_dNCZ@s3$U^e@Ot>rnOJ64*p+{^aDegPDtZ3d>fYY9Z`UA`Y|glRn+n zLtkH+CwY}Vv4=~EC?L*Hugm02`mL1<8XR3 zs)7suyn;)DeDdCEQ-7H4*ZweiflJ`)8?%^t*3ciegF)$-No_y}Nw4mKA7qHhE05Pc z^Z%Y3d0`6G4n(KkYdy&-F@l|jSVrgHXPqNgvP!Li^)a#Gpj0g)dlwsB^Y;;4Pjm50<^ys?wVdOtt-akJ6bY!r& z+Spv5c~Xq@C9&T=+_N{sK=bxI6%4k{uhLQQ!7_5~{%v|Z^7P7szdpc#iffHsLyg@1 zfB4`NQ`3H+kHxj<t>Suu42E0MnpFo z9-W+wZ-yM<7gYtlj_mmTSRH%Y7)oD#_R)2)hHcl*?7aC~Dvc|WE~mn$B6W>2hs#L5 z_E0|jhMSAf8f%3zT!V{!4*vWlq6IcBP|YOm@K^TVPQg8mhQ`LVk#O~DP<&&A%p1j3 zcyIsl+ff}{`Q%Ia{`UA5jkwhsE8KK_kKemvv>!X-_x}FLh80y*n#YrTj8c^%1x*_# zelF4%%^?Qe-_FG6xfB2%;!j4C!#Rm@uO9w>6zanJip-NhjS#mIo0x+Ppvrmk%YXRu zZzthB-x#wX37AboLclQup1gN}`pAmQc2pb`(bPv!b?xZmwgw%oIq>G!wL_A%T~ty4QotK*Bp z#CI`J%(09|SbslGNp}>;uu=>a5+w(`X}16QgV#P&#;t%GU;Fv<0eoub?=N@lrg>vg ztGF^FryuMr)g48Y(*%-^x=k7A3CUm(V3 zJ}7t$wK=)T7NP~nO-N&8iAR>S1V4%fFe_UcQQ-_#(kpociuThaI8#USRV?yAE&*sD zi(>FaC`gF_!qcnu*`y|kAF8XXYbt;WKf~Ri;PI=GCBS4@x<;Z8$g`;R2A$;N9XJQ_ zBYm%to5(-Bt|C4KV+<66-t! z76nblHdv=tf6Fobg{DoI@kH8mQJYehZBVIvnaA?Xwp?-JZ2NshZjPj04*5D$ETerT z>1V;GKK{cFVDiX5lPK*~PW7VP6SBo!&+pA%n=YTqOOSNyEVi_w;uKYyo0rIV%TOXf zU%)=%l)9bzPnakN1S;>9+Vnxe3q;blZ6Mj1qDU8|VIHJWBlRGC*+%dI=+OmbrGX6i z?W*_sPpB+#4R$qd3ls?A*;Xq6>`O599UUFuR>-)Q{1pVjkE^)&MF=l4n=2-^cs~7N zdS#m}vv8xr0q`|{^~&R_ZE>28_dxAl^%zR^<|4T+k7@*x?_MBo&4XhQpDyx zz{(1T;o86wiJHZpxF{F%QHbCIPh=6au?&F9Iebw=p-P|P1ycqBhL-j z`2b>tQX9DN!P~N&eg`_w8*OT3gi=75ggPsPm&uLV|)S)<-5T*ow4# z%ealzaW6M?!~v*jCZ+2ZpWYXIQZG!e;N!*Ik30rN{KnOlu$*-Alr+rw5My1M!npA$ zUKh7l`*r(TxKHWOz_2By{TXuDDvv~To4i`xz<-JR@=@a7By3vN3Ka@C@`}3D8x%CjP7DSdC z_hG%iYITDpwLE$`yImV)U@+Xd$mjr zbOeu+@1lWbRz$wqPF{-)>nI*4pG=Jsvb;lOW96Q|zTfe4=4YyCu?UKeS(U5D{H2vy zp%hvENh=diPIJ%HFl|P>`E!H{gxAY=BO@auY&!glBb^Cp;YXC_-1XUcM6x@3>dDp? z9nPkm->Z}J;3?EFF*TCkVK3$jP1x0Jj1_!^5(o%W?;p&|MvfiUL{FhlZqz=amuQok ze(CyfF4{esM|%c-flbD{(oIgMeO4NL2Ru6Ej-+Gk?6!c8-G*5J+%eia7hydg6 zUHa$siQdD~E5%xw5=pKfZHH?$Cwr3~UJFNJMoCyT-enl^K7$bm9F9Qh0ve~jtDx2+keH#Iq8Zv)-21VXanI$1B@4EWne_pWZ_3ft z!>i+4mJXpmfP11Bqfk%+|0g2iV(tgqYZkWQRDT7!g~_J02E0fzl4UwUahk5{P1VO;<;@HWul$xz-k*iRD2c2R( z%+cJ~2w<=W3X1!nKksMe$q*6tV(^KDGA9?f5!l1PAHpB}0{T57rNXOv<(I!-aaViB z{qxwPyP;6;-bdHh+o0f%HuiP{ZJ*#ap3q}=DbisoSKYvaT%nn6R`-3RHQw^F!}ypp zNibh4vykXR#8oo;-yUE}U0R%x8cYeFSsw`x4Zmm_%u&^(RvXYKWv0@z>u{xZrp6xR zCDR@3<(q50H2(65FPAk@HZ!dbVq>zqN!mZ4afCe{PohRJOst56LcB}rn5m-`^L{E=x^C=8Itd_Q~x5oAw5&^=8~3c3nSluayri>D?ksX>?0dYgu&X zyR+wMU69Xd0Zd?V(oS3nFLgYLb{8qLZog9Y)I&1yy2L4A94(4O!@)?P) zJTJ;C7U`>xZUq6SP#Zu%_V*}(VTbhuNro|aU+9wN&6pTcm3v?`VRN~rsy47`*0mJh z7}W8}1>|qGFogI1j0|`n`xC%c%({JhM0dobq+Ec2{%4GwyL)-7r})$NX=mPx+9j&R znBjS}Cx7`17{Nv-9oRWutP5cg!jx{#gE_F5&>ev!-gDqJQ$T{2ivx$2PUW3#pA}uV zK0|l_;l)AXyX5PGryj2{pd9>|*p_B0D4S$Ep9-*}@c>9kNm8TR-+-((5dkC7g$#(i z*SuuU97hgRu7aLXX+!~u59C!_@pp%|Gr1b*p}+Y47S!py*ibDDbHea52>ma1KOCtK zO*?!>ba`W$=pu614_Jpb$KA5fvIWd}voXg<;~MX@wtp%!StNKG0HTAfjIM#aJPWK> z`Os@Nn8qzYd*-I5F*rN zxX(cy(HRLNHsFl64f4*Y6!JP^lu;JSl@%n4l|55X3EWumqmgS!!VS1OxSd3OoZs-~ zS9TVj<2e2|sCz+(WwtxIExpsV^czR@3ZJ|^q#{kd0U^mmh4bYe{T~ovyl7;!c7?Kl zu?JZgd-OEKgX@Uev+j$Nrs6Noh=|Z_Rk;W>hotLlyqE(t48HSf`ZauarQY^PzAxdm zFCnP07v;D;mB)@@_@+}s@L_!DPxB?a51|mAA&6&JF91rRluFUPYb4*l1_AtwWG@{N zAB_tdblqGXZ-L2#=$&@{@r|JoXR|yPog*>M3rXKkTg7;1C~u89v9pgX-3~?UoBU~* z__X4{+e9IrjoG9WkJ{<^fQALBH?6Zy8_s1@6QH5|K%{`IXXG}5MBFRhTH9y{kUJc2 zSi#VA2N_&L*et~g@#oG4D6abGbteU=F5adrte1Why4W3k>k12oPr9D(rfX-KDqB*? z>9N8let&;|Aus#KA)%qktd0QJ0ISWw5)IV}*-U}@)QrrrHiBur3Go8#NOTYC0IG=( za=;dNifG*G7q~)?-$gDyd|Zm&kf(_9R<`Jz47;yajq5#%%bOc;F{x||TCBd&fw#|e z`JVU5YV^Fu1&lVv(?;M23tc_ZQU<(pmzfHpT3A@vx{|YMZC#z+p_iLIl9xHT;|Yyw zelxSz4F)r5eu&BD#q0VLr`|i(n;mwWc4?B#51>k3Yo@jlC7s6Iy;YY2bV4__^rv=X zgaK_nAJqvi5%NYi`iw97FD;9(H7C_u)=`#4jEB8OT_{2(m3C^>(*D7Oc=lPJoDlr* z6?7psNz`?+_C#2|xx4~LGd3{<;wBwm_&0wx5U#UZcfGNqmQW?@}^YAf5x)# z;g=WmGNOVCgyBW1B?vl->yMwfQt6=xtSWTw$!bq-=DCf;zak#^WYnO_M#7YV-L_nN zadUJN0+cpaS(&V><^@bkYpe9dYL*8`?V3Zx(W*5CyG-!`jm5+1$!M+PcA4@D#a+}5 z{Iwnlgp+;g@(;y<`If5GkCmo1)F|Yn$ELPnm5j~2c7sCK7qxd6(8|&LWB@S`CP52_ zoHzjZJ89Q7%gzhY=kSKel71yMdSVKv^A!V&Dz`2)Fb z8q#VvFoCVJ3-6XA{O2RqIyEP=zhw?z)+TCZ%^c-3c5Stpw5;iI+2`_Eq)o2IT@5*L zS-Z_a$)rtjQ7U!H9l4T}J}llg(Y)m|Dz_cQ?momM#7x;%^>`>n^qH#`3s?vDcqjB~ z)OkFTAW}=(4*ewMcC;bNLzh$1s-^U4PBCc%IJv5CLR1@gMC!$oU^kT%? z^c#z|4V>mA^}N3H5~2M|dP#?blx?(I-qawzq+~0x^Yd~sYEd$H{nx38dw&tnp?^S& z?p}2;EYmswq66g%o|b_cpBl?o_s(ry*2tU#^9Sww!c|z}RiwQ;6FeN#M1AzKA{rY3 zx5Y{CeK9!d4D=;H=PR9C>?dx+vqVj?SG3W-T%pL+xf#D7Y*G&QjLh3lXp-Tv=m0qRe%lL;QI)dPL#ehRE zh+RTSSey7h=-b^Jrtf9gRpD=f)pc!&JaujhEF)kLfS{!sBY3B10N^+%06?V@K(z)g zA;$c47#^-+$=HD{PMA`kw_jz>KCXuJQC>c%vLwk#UZve_!}`tV)H%qp7^`|}E40bT z$JEvY&Q{*2#hPcE-_A;3Vlb99z{GJ|Js+=QN;FfJ6sD~ZRvI}atI%sUVw4}D_~Q!5 z5`|^_(8lrwqW9!@Zerr7DY~njrPj3$w^|m58)nQFvNs8*c z*9puNYuhT(U2UzA`aM6fM_DpK$4xqJ43iU?-o>5B&FCS27QF>6*GDd=q^3fFMi-Le zGmn*A%D9B~8S}rSbsfg90zxp|FaUKgkkYTt8FFk*dcm}2 zj_&|Q!I5N{fL_V#OYE-2i}v*j_OFfsB-QMw=DnjHIB$5P(PLB&d>dSWkZ_`UlypGE zP1SF7JZ-kMxpD06`TI@4iGijs2bU3HTJxp0uIVj1muffs!)nUAJjqhsO7nFj^%Xu@Lds=f zewExbye=IdK+l_@J8K-Ws|4+mv`>z>Q&xYt^p%oO#No=NK0oJyEwx25X8GX!X5w&M z7ctB0-n->@oK@Vyb8lxlj74uEgNq0}^Y}T^R?@yxiP^Z7&FHK}jkXcPE#i!4!&1kN z>ZZ!zjmcA0SeI^HzT=%z>`^r74^Q=Xsb_O3cnL3W`4UcYj+`9)Zu;fL9ws)Ct`5TL z&q+p3&xhiJ-yed&4yt##PN1(^6=t^u|94P1oAhW-RXdI-SGqfax}# zI&lf@-7%v#6+#es@N3IQ9j9$KJu|hoWbw9JcCac&n$(?`$0GAGmaIs?`Oe*KoA*UhZ-?pO44H z#G5?suuw8vs@O$o%Fzvp~1uz6)}xW#eD32h`;=HejLfDKM87yHZe zir$^z0d0AC`H&>V%U@7am!=Y`Jav)8gvsN$N`6JVt=sCWX^GfaclYn?ZsSk32XI{v z(`_7xAFs*@r%3FT;X1weYUf*IoYR0{OJ(oYGP$M>e~xAP6x*+Gc%U;AKsz(>j~{`yWN z9^}MTuR{jZHL!Do%c2WB9Wg2(TsvOq3L38Njc~i zsL`qrR`4dD+=e@KH<8?&vzw_!>PS3N(h40pv02>h;jT=1CF3gPHxuezb^ybth{6l> zIOu4$DLP3d^qzKl1(`Ix!+^Mvnn(L0^n?vH1&IHpOoa=@8{|!|R>$|E1Fqi|WOtlc z%u}QDb-o^N0NM^X?)E#~us(WrZJA;R+x4O=#t^8neHYM0D6hq*neqe5606mb>E>!O zT8vX)zIzQIJlvFpmqhfPyYBS2*VFkw;VYUYf4;hZ0b>YdmvaLcphmrhbbuYHH)A~b z=T)1Ze}M&X0W=@TXYPQhu+2g{?-$BMNs42sV;78jnYO40@6HF{N2w3)9DYq+h{tw1 zpw6o|G5_xo88m5#^?dlo&fKSYIKla|C~3OGfeZa~U_YN1F^{vLsn>NyJp_O3Ji0oM zoEYal+t0C@bl&sU6hTri=Kavy*x_)ajiJHYBRya!ezP}T{UXp6W8i1V(M)fBvFE^a zQnQ-9*4H35(L8`!ATD~${a^w8GZTEQan)y5pz%Zas~Z7mKPZQ^h+EX8EwCqfgFYk0 zaaiV}fySjw)D&%Hj)T%7L#uZ7P(DoT5A(30ZpKdp>Q7eL{k1$|SH&$#-I_PrJEGHl z3#xVr#|Z%Hk7D=JM;r)Rvs_(p7K^C*wO5GG<4a==YP_q00>O)iS}nKR7mYvIre2Wf zuYJvDG!yr2d8$k%hiaI$Vexvrlp`^m)wNN0s4@4_9Evx1<5AIamtB=9D{Z}WL*9<^ zgs3_!9Z#;>#6LXr`RCWm8kcEk0R<0!TQcpz z{31IB6{lbU#=~UGozF}fp^or(6zM!G|7&D9JJ|jLtSfp)n%86Z_aJ=6RG)bh6vVHY z7O>i+k&3KZ?0NW-QAr%u|D{k=b_VNA+;lDu@1e2F7-NuCAdt%DvDNtlQ4qOYe6m`9 zbzC4AQX*fP2hG`c@J8j%O?h4hYeY2hNOI*eeU+uN6soDy0%b8Jb8?~{$D3G>L;NlU@4 zCV?6PcbXp^e5Zr4B5~+lA~X-CpQpHbg1~#2^}5AmSLWEMSIMG-57UNTD)1_M(CG+G zKqkj7qu*r>UGu1H(e*G+`bC>3$6+gNdpu~!<;4D44qr{=v-(8*U>AU*l*Zh1HGyJU$9Z|8$4A}qc~tE-mj#*mDL1vCTtX11Of^`=`J+`f zz*^k$6b#2%@4=!Mo|uT#KQR3H`7R(VEjv>hfNoIHl>RDQYwD1$Q5> zwW=8)W%l)>DZNSL3Wq`>)e9bB`|O|4yA4tJ^XKCdU>{Z0`!WpvfRA;S% zr@JLaC>`J_$OrbTqxh?#@F#wdqbD%spT<+x0H{lu1g*S1Gg6w;L=FvYSyGwpSDjp6 z8Zg$}*sR!(;d4VxMbB9#+KsJxuP-$^C+>KvuyMP6i(bt>A6)`1s%9UuoCFRhx>Tbs zHX5NUP^A@|&>#eTPdMv*CwHJY+*QGM6X~84#b#GI2Mz_NM9+ym$a2`)PADgi?E8l9 zU!gm7P-XHk-2pb0SD_aC%4jL4sI_w-yUzVZVw&v~Fb@tFUl}P0OsGr}mH9EZ^P@yp z!3Hu4PRw=>`-_WK&|2VVWWZOIe8RHMV!n~&7NW(Yq!K-;GFH_MTr)j` zxBB1ozAmW$wpx76LhZ%*_Gel*HXIWK622UTHYHsDa*y{&0XL`j!rU!ty@|S3MQ*Ke zUS*b534SKqtH#)FU*O5fM4DB!#9|kolCwB5tiE=UlIw-#LvJoVXIy-+I<@O?zh_gf zpFC-w)Ycb2&Z<;YBv?C97bCOK+W9(FuZ1#wC}l-aiGz-o^D`bZ6+^Xg9Ig|gew@Pj z&KpdkJ)2XzTB)4wBlp=;X3A*;l`=J>J&nT18fs{rm-X#%_t8e}Y^nM8ZY8lLGd*!` z6QOnS4IVSJsX)E%cy+t6 z@eLI-7+nGHE#Jda1sb$XMr8SFl!x`!VgYxe`!72mm(8qtx9PXo#%SGy3vUI-YhFC{ z-hZi{5?$^PkWa``3KEN$qgAN~FGX1mLh&szl#%q^5h4SaP%LW$PJvk~LBfUZNC7A6 zf-4xvypQoN=0ZLT^*)3%L{>OW=|E>ObXJ0Gdx>`5qT}P8f&3k+@2?I4!K|SSq7w}a zLlDYdGqE{Ow4Vh#oZGFNuniU1t5H)aJ|D*iysO7$X0y#ll9Tp`|7rlE@Z3o;GgP*W z3;$1X2`3_XfX!##u+&SSmQtAef)FK%={_!>?pm7?%XSg1H;F03P);@)w_X~gzk4$2 zGm2J!{;(Y}rts-h>*Cze1SW*04pL>^Ypjwh+hp+ppEcUXxY|2=(<$*Uex%>U-tQMB z!W-W)?aLPB9#x%ei>z$Wjzg37xrMLEZD5SP$6U`Z{ZScv@qk;2>JB45I}M*|8$-_l z|NP)y|2meT+&|psDs>TT;{o4-Os$8_1Pm}svmO_{-{PyM`VY%2l5ian(wSPPP?v(R z(kt!fSqwej3f=hlI8|vltyyir>8kg5;={sg*KAv&$ZW^3r8dOTUTxIz3a{QUVT6cn z)6dw#UeTCJ>y4fWKD>)W6~cMi2rbMdB8H6#z=x(9N%h5OUBAd0QmN>)eb3*VaMCVM z#KNCSIOhivW0rqCl_=_}5NoI``&iTe{yj$xc3JHN(VshIY!Ru}*dEtm467{L&uD!% zo0MBi>H76vv1+B~#Kv2pOq;uRu?NOfk+`U8@2RbtvVp9zMA(ZL6IhI^A5C?M`BQPe z!L>qXZHau^S^Z<~a+x_fIj;_|G|tEebM%91l`Q8sS@1641;6DVaz{s&npUejmy#dt z7}3#}xY0r*+@SOxsA7tCH;(&NRp{SlW4GQXag}S!JS_6zh%@L^P_~4NzN7bHY&D*u z`JWLID)X#xxF*iI@>`I3KiEwTk!o6cJr<~; zboO3jHzrr?Gdlc_2t>(s1j%dBg*cOwux8gW@D7k>57e#z@Tw(M` z8o5nX?C~XyV2+c^4d=JoVp8cw0*C+K& zFT?evh3QZ3Z^QdBm?ST4by@^vrzxp3U0a-1$&0L9sFmQaA*p>daQ;Rfm$Y2t+KS|a}6F38X5`Zn+aD_ z?zk9i(J^vgeec4ribG?Y`Q}?z-FC#N>b87z`@|~iPcSD{Ku5tOuq$AepnwKJKZtGv zn0F!+2Pk?Yz^Oawdr{e}Xe~Kn)sy+AT%{pte3fbJ3O_l^fS-9(sw#4Mb>G0FQ`P=5 z3er@2@Lofil$M^Zd_MTL_LtD^BFFnmndY&i!M$UzHDpoHs4ysV$WU-(De`G`>N}H&^SItXO1!BIp8o`y+L7;X%X#0FY-7#$|99Sy@B!y#ws!g)?#&mCP&NR!R zm83nls7>M?HTn}9QruhVS5E*JAA!%tuQ_*9PVNc8L%!r@bXW44uJF>zZ1bYcYm+6N z9(($vu1KxYd5k1DwBYUO$Vfd5-sQOQwUORi@fj2GRjy9K-oEJRpDma6sK~3$a2xk@ zOKT^0_H6~Qw%Z%Hf7(~Us@i-`X}G~-YO{2sFpko`Qn@k1C`CIwBVm!E8>eM6KbF#* z5^%caOTg#5-<<7FA3DKqAC>3NN%c_C7~4^kck9#H?j&7)73ApstT-yw+&hBz0Y>}I zR3DifHApK@FpmBxa~e%#D`{R1{n55LTWT=HUeL76{XDNv{BoxYGFYtc9@78zz7YI< z%fx|wiKW?6Qo|e4CY~9h{X%Cts3es(ws1*`I@Po2yU<|!9{eZD`AC-SdVDu8dUAv7 z2U=k(DsoFH$8KDkGES|ew9WVEP0&ISfjV;QoIiQkA zNOYKsG(I@c?x{0_7JAd*at#xB(ah+nHD6#Gf>Mc|dWc+mzEz{sAkS50Tx9C;80Y=^ z+{5Bf3+p-SgBN8RZ7=QBK{78Y>FhE@&)uoY@=CCqXt}X)y;oNbseTWm{UmD-EvwD0 z?umQt%zVFum7^KM4(W5-NoXJZ;&@Hce)m}S1WFrkD+hV`<@pzc#5X9mDs1$Nyzh;G z2G^>MrU+O2$S-V-Lh!$Q3FUVpv6Z82Gu4!QIeFEPud9p4eq27j!K!F9Ke9CKzd7Ic z*@@WFY%3D@zTWOrt+~-tWmN?vM#=5TbylA%2;Jnbq>kh1<{YyWu6Ly;1H+6>u3kQc z@=)<5B$lB(K&zwkcjxNK1qVPfndym;Vmqd>f%64f^wdbt|CBA+ zHt(N`h-&%K@L91xN#aS@icigaEv_t~-skffG`}pe|5Hc3SWsS zcbf&!tSVTa3$qH6)!#>AFpfmscGOrJReLXjmc@>-p;I+qFdl?E8i_g~fzvl@HXaxq z_f|Y5R&Sduiwwnwyda!ILtiNfk@B}aakckOgd}))>6ShXd+~x$z8k~kbMdl=CnIM~ z?L!bR|G<>|Px11#o0hGBUeV}}Xk$(xH#3~~meH@>XP?rCV5EnOHSkroh@s23z~kv{ z-8v(^%JC=cd}Wb*&}aKUwc&4)JkNA&)v|gg@RknICn%!aU!{4DYrmoihgEJrzv0K^ zpCKW*a=G!;so}mK`<3R9rhdP)#`oz09SIM`CpyYaYZa;=ou)Hsu5_3fH z2Bzi^=f_f(NnMi{3_|cmdAV$ZJ;@V=&$ur~ zL*e0+2soDm_&jL;EXF)14$SfBp4H;`O0^ zB=>oRG+}EIMHCxz_q{t|@VpBCIYYq2xNIMqffnhyRTMRt0B5}b^J@f&3RJl0zQ_IL zCAK)n5JL5we9ewszdROk7Ti@Hc9Z=cR7004_chWeu*J3xq4ee@C?#P$!4%>EUHVf% zVBbYM9s`A_4ZLG?yEX3fui;_qp332MDg}<@i0khcD{-1~3&X<^0j@yhj4|oSW>H}l zm5_A-r@|v5p3n_Ch%KY1KbXPgizu^C1|JL=*G8k+rKKA^%9p9)pr(5rMU==U8jr#G#*%n)m8?qi{^;^eVNScMA zRowAGSM|m%2~w9ONr7|Y9L8%r0j*u~9(-iQ^{fNH3Rc_n6{3RaXDdOwf?hB*Y%l78 zv|i2{!e9p-Jx{cfm89yIjVdl7wPh+{K^g5j_> znPBjr)q+Jp(xmnv^>xlwkja3;`~*ri@QfWMSSx=xMreiJc>$g8Fu=|4iPf9szL3AU zW6{W5Lz{QGu887Ua2*Z*50?@9)rU91%?pNvJZNb9d72EJ!QRm7f=h6!zG|g^B?JMX zSHI*f1%~swI#Ayce{@+ zV4Lc`fB$JP0%wQ>0%7v_$2+LdbLe(1P(Wa;GS$ujlZ1oABA`oCf$f2fjZI4|8YF>@ zU4LBo*`MH2Y9pqLqBU@Ha&d9Rh(37*4U}Nw*Fe$0n9FtH0{!5Oj-Hyv=OcCJ<+5Lf%=>w-&H?&Vb7kw6B=2MSCp%2b&2K> zoijRk{3`C@J5j+P!h8b4`t&k|(CC)3AYoJBv7E;tQ$b)- zCfY-n6YyY5s$gvE_Z;t$4|8D;Q16S2aQmE}a7ki$tktNv+#|^tZ9~j`+>sxBiM}}S zx5CSVukAZc?4M3+&LXbhZBPAJZ{>4QF0EyOP7S3)m-*rKe)!r4U%rRf34CJfzNL^O zrU^zh2eZAl9XBX3}Yxo~v z?nk1H1@3EI6chv)0Jl+on=wfkQR-uqKJwu?vfB%i!j!LCcsE`oK450{F85to|vYjJ@%u^{1A(`1L z`y}%?SuJ~Ik8?QLd+&Mv&llZ$yFdN!>vezk^}AQcIp6ac&+~rXL*I5~sL4C~u3krU zKZY@SM9*={_SHo~H)7CBgu4n=iXw1qr?9$JF?iLw32nz&@C}u1KZ+Oo8j-2C_AF$!SsXbWl!+=UA0Ipo?;W{Un*Bm^ z&6XRm@earQmZ~>5!z!5gaGG>~4?Y7VsLZP+^{TNMP}eEQaeF(2aN7!O75qiEni7?~ zp$c< z-$NHykKkD&p7iB0QPa&)cQo+E1Rsq(_z0Er|0yk@Ox(&8J*=B!AUiap?bEuM;Z(`R zN8X1(hl?EKJcq`GX5|9-Y&$deF7e$?{}O$8P;DoT)+*`j_v#j&P>1oCQulqAmYT+@ zy|W@zu3WixYpuq6DXG_(C`~|fQjFDqqb2kkzYi54#rvZW z)xZ4QLo>%lc03{Oqr$`o7XnvSkjmrrohIIX3*Y+PmvFgy&g^>WtxRV&dRfXd#q5^H zmEXXd1C=xF8^V}pY50_xE>lv>!qATYjPgwOke6K9y1p;mC0RvJYhR{ptr%&{ z9$hpQYgvh;^a}M{b>I2Dpk6CrB@JLhq6BSPgM(j^*UI+>VH%1N19ippjB<_CnAzlS z-dyu6;a^>#d_T1fwg@{AE?2*8%UAMkz5$`bjn{+Y<|;2;Th<0$^siO(K>Jd@6_DD# z92zAQZ~RmWY9o15$Pe2|&>S5Z%?xIQ-vN8m`sL`)G{HF? z?~m<5&TeNHVT?nM0LTL2Ozh%|K8Y*Ff?Wf=&Vd7}Vs*sL40u`rbg9;;Qx2NV^7!g1 zCWtZY(39IDxg%`6O+bm1-sIgbW#QKsl0>&J{sRgLxdKH-J$>7sryR{Ysp zlyfMc+4|qzJQ>%b^GRey)Esl(c_5gli%&7ja*%HAvEI`)%`_vYqNBF4@f&ka!eP}J z=j{Fk8pHp69XJ(aTC0S$w@C3#IFvsb%^Us5!V>jR)u3s|+F9=I3S{Wl(&2p{FIf_O z_KD1MT@cQW6OZm?KunkTcVqX+$s?x$nsdAGy<_@5pZPJt~( z+Dnxl+72rz4`|iT?AcxL>sG7&VyJ31kxlQsP?Ty-tQcw9_&G)om1%#x+qre0D4*jL zBl;%Id78-btrw+#?fsbyNAa&D^7vBud|55rCcD-D_q1N}&KbN7oB5uz374YENw}?z ze-x)!2+f8w{kqTuSLQPIGq-5QRBG2mlvNS9EMI+U-e{4pFHY~!5JGO%XBykOTX^H+ z`D#?FOPnC~?a`q_w2q&q7rHAoZJDEo*#^$`&#!He-;cF9DgFKYK96uy9Hmbi74|TE!R&9Bv zg!@Re)!42>6iVZg8-71z#u&rorhub3Znsn+49^+UPTNY&EY=H7qMZlVcRiy%-OYSn ziRUL>nK1WaWg81TmxW`F+Ah{UTmuzj3MxMe> zW&8V)s20%WR`W3XIGj=g(}Da8M&$Z{^5cdWDTI$d6F*W%UJS z(CkoilvUWa2Q8Jw*Yf*giaz8mg!J`k!v_U3spJs3P{(ljpra+j(EJSs4-SV*G>ka| zN$)KI88L_>t^!G_ks&Ze1r^Irm@Wa>Ol7*q*G2o&5+l;3USW`o-b<5Saee!~>ok`x z&gU(*#obI<0w&%GZlL%9wE0gU$d46s#;SLLYV(AHs)f0SyZ1_p6NtNzjE(z!+Z_ zhYo?}Kf<(;1e&1H=|+c#mxQ+OYcpZn_3b$`H1=%BkP>L;9UUD=FA@kI!ex~(H;oOo zm+=E1HIVyZpcD4KF1QPxaUAEKSFZZjW)tanS*n_>^(z9oW#-B;SE@gLA10>pIrq+J zuV_D$#IWJ97SGMU)y^-TNDS7FDSxkDN>q)n5x1GV{|uT7beX^CFbr^cM+>M~D^-$$ z?Rvm$^)w7iM7n)$r2C;FGaMKCebX&M><79Jv+3MO>8R(kDrGSYeV`lRtOUK5DPHW^?iPRGLK$IzcXtnUmtQSm>gszL&t|!2yFL9_Rp--duV;XLr#4u4 z@wONSTeuphz9`hxL8;}igHd4;jGv=b;uFh!YhQiPOu2j5E&kSrV9L<<=~nM-_4d$S zP8aeAn^G`R-jnFUXL-EzUiX=pVckz0&Sf60J^`DfXsT`Xg>;21)=Mr|vF9OPoiRzI zz=Yg#=)7o7qqyPD`CdF6l9Ap{yH7@zt3rI!*>NLV5xhz~myMN7ymD;d{MM%jK#`Uj zO@pVn6OFR+OTxFu1TF46#v}_hwoJ-k+0K6;L^ghHr(x@ZF%?MPNmb`_*XcO86Pxgk zTm)zc+05YS!kaDu9%YqaSqLhpaGl&wN=kX#g^Y<=)j;=%rX2=Hg*lkG2lMOgpcqB8 znfBeF2*C-|MS|K8fRL<#ghUbYk`=a((+q4IL*UI#M0;Pi@A8tGYD#bESQt!JH=YAJ zbMvix6`;v#V^#k^gnyU>nkgmg2)n0c4r4&-fe^S>EkVKNBNe{AZ(s=uzZRS{dO{sW z0ML3lX)qLiAP+1-W&Al1Q)JDykOGkY*_AN)I|A6@XWI*GcD3wDcz3X@wO6e{0Vc5? zbA%}m`)DN?OSJTR^ZPBgk6`SknQ`V%U zXKXmLUOk4*kr$eSb4;6EpLS{QSpKxzeMPNkvQ*VPQ7Ot^Q!`M2tkDh>4puerl}rKE zTJ60(tHMnXzJTSmQf{Q6aqA~*Sh`@(+^**-9FsBRXjnz5Sq82mwMIZ_mss;^7za{T zGAIGzFEIZF$5haMCF$g9j*P=IG_C0|GG!OSsnZJEr>E6mEyjLcsV@hBbQ*|E^4{#! z9Jg+Q$p^p&;2sX3VF&noASP|AeldV_fORLhQUnJFgIK1l7g#h{UNquKp_Dxaj@Iv8 zdVy}A)OuXZC=dP{f*g{^Ju%U-v4j&S_xql7!s$W`xCr;slk>X_j-NMf#tSb{e6X%^ zfYzm!b6x+ah=$`Q|}LGt)f-`iw>o8^kkv z=ORmZdAVG@;adWHIal<3!Hu?(D}hcJq=EAw(IUdBs;+JA>x2&h5fy?py~x%|T@Kq) zT3kE_TEYw=44|epmsX8vTAm@6#U<>b+f-EE5Wb3AXh2=Dl47P!y28+DGj>Dz@p_~D zTKWnUI#5677nO6a_(C@VzF+_L7KKjvfYP|nt*PUdlGkiTx?Mk@*SZ?!3&IXxO68&B zI)83+l#7CWCgxWa|GUNJozOZbJPb;7J3Tb>J&p!d3TyA)`8Rk_ZFKHdMt{qoIrQip zEN$CeR#AiBO(T##pn*AhrOxmtU=^yS3)56gcC%mZI-Wak`Rn{RZL|2{F@t1sH`-hg*qnu$a%KE$e&hGy+b@3M5AKpZn@OjN0Tb#M zz>^27lQrFi-~^9QQ9Ox@vERM!Knd#%+5w|uUWB_Y#J3$%d|bwbIcG>t`Nmp;Ob^%Q zEqmN;*Z_%0&-i}sW;3>tQ~DhYOwRxDRS@9!-Ht3vt=lRL3(kMNkbnPZwXko60`aFG zZT9?u{wI+XF%NNKmsh7BzkjehbcYq(8o#e}&SRt$fPiwnGiJw zXEpY`vQ@aPthk2r{N`~5t?afRujC1NNsYxq4oU+eWSvN(7a$KrKpyzgBZW;{8+^<) z)3s9#_Us6IBwhXGdDxcSG+hlU3hPF3SmRW%>)OH2V5wyW$XO)US>d8Jh=q5yiKNF! z2ExSA$VY1{NGd8tw&>1G(q(Bvn9%J$^C}VY-Mi4zqWP|rVz2TD3k!2_ph2Jr=D{iw z)4j4yk)>DzR^~e0cl}XHyCSKqz#a(E&$EN)ik{#Wi@n`y<~kCgk_^RPKPlpU!{VG@ zd3rV)4L&aWd6A8Yg+-bnN+vGc%VJ}TJAoB1A|<_We+D@(n?ppxprcXt7rb#9fj6qX z4IFiF?U_?Uip5x&LO_)>(XE=@QAS|!rlO((&V!&vLp)Ety3|x1fMEj?g+pT{mJ+?g zAmWwjj-h*aDRgMatTx0G3?KK14DLr*Wxf5@;Ia?yPK^NFY6u=Ef0R`=48C|70YF2< zZpXv5qUrZ|>yGV?8onZ$g}{#hE4rIiX4R3;c8k;=+1b@#j04Cg2H?ZMdU|^hNk#$g z_R8d>^X_7%=guaU`F{gP!ZhwaPOqgOSAW4jlRW2rKKE#0ogLQ9em|WRIZbCx9^S#Gv zfOP_NS)dLC8J-Z61wdO%U<_E8-QPe%e+(Q*Nmff6eqmU5fbsFhS|M>UH+nhRP0jz) zYv43fPK`JB>IUO)ORUZlqaPlZJCFF$9z1fyw(FVVVXqniRd2)rOJ0Kc8ea9?hl4rk zYN5^WT`Vd1eQ!uqSKu>>v9e-Pq52AxmXwpTHvU?wy@(&ri4eEXTet5$Pb(E(U{1_R z`A(4Q?E%V_v}IzxCFh8Cms%97sp`B@%)V3A2=hu#?)AIMih7d)_+a3Q*KHQVvU5X! zJ5z>)%p=J!mwVkfr4%~QC6A*{$y60e?_Qm}HZY0cThoUN)Y}3wj1F5KKOgVllwaEe zpe@iZ9+d;z5OhXht;kc0d>&uT!FT=r{cr1G&eDakg!QR%H|G8Ocm3k8k6sWf_`oX4 z?Zh);$+j;TJQk9ad0?i1A+N-Ao=+HET8@7Eo*FAUZUIj*PF*s%p-NIWdo~j2Uhb7A(FcpXS6iny_cg6JYhg}{Ievw}4YMzY6qr?O` zb&AGhUHAY;bDxI_JMbCT+yl}|3=2O$8(SjGN9?(sN56Z*aWT*XE7C4k$57e#C8O?P z&o&o7ZLTQ(e&^Y9J2u=pEpDu*Ohz8TiA2DB>`h@N=Lzy@{CyU%v)DWso7Xp*64iOg z^1xPhhJl?OJA7*CgFp34z0){L=6BdnU4HO}frJM_=m2g*k9Q)0u-&;S&f@OEd^N!o zDJZOZXnJUvFY~1n(=nt3JSy^Q!hwI|KTbLC>FFDSQL4gI-@Zx2Lj$&*r zdUIEU9AOKd7i=;>sw{OJ#>j^+AE&Bu2I>sd4|Ax`adP+W{eoU8MNB(w>P+j*0D&b| zaNIs2H4V6{gs+ybCu}^g>UjG~jyaf5v1X`QS+0QY=+=g z{R3vV(VuJ}1OUWBM#QXf*m$bD-WeXcJZ71w?w1MyRZHE@Ijsu>+xE$qT@ zepPT-XeUYb5QWzaC-T4<@xF>s)^ota5@gok!WMMHGjEfG0ha2I)in-8bE>R+B~-90 z!Sfb;ySMM7I~U=rYNE`P;fjW!4Ax{|<$qYHveUf$#D@SLqlxH-#Cfa_yzBa8T?|}? z;00Sr1T~6D%;e(@{cdHO{p@awi)o6rI4`kd0BO=XLN9Dw*x-|EB>ndW{V{VR$Ibf= zJ&OIp9yOc6Wm#o|>q`7VVEL=oKmPEZ8aSe0+RO?rcxj!oRes++%a#l_9dC&}PO9~u zU_Wcwaz;L|*t}xul*-M0w1Vm2o?7P5$uIwSrzG02A`nd0K-izWB&v6a^qi-#GK;E?tP3H-)L)m9b|sqlK{r1S z*0YrJ;f6avpna<~E7J&)+Xn}kuh(5H3s!v^(7>Q*ftV35(SP&s@MzGbwVO5g!y9E` zb*aHWRKCn}5mqsjd>XHh`$=B=g`36I^g`!J6AY}-WMVoyLi7P%SU$T7x_)G7q5^=0 zd(mRfLC@`!t9L1*yBz{$p`V%*!X?B7Q`d$Bv1uXj&#O(UTLV>+iK@Qnm8y}R1JOj8 z!z$mp$sq)=I;|K7E#5@7#ALA-11||KvcUe!S;g3-&z<9ahk@cbeDn!-3#Hw&Y(H5p zQ5eq~D)%j?n>8yl{4~swda~h?IX+&mAY`Kuyiyd%PGGsPRr1^0Qd{(%Rj3Gzona#bYk1?l4;&NWj~4^ZcGj{ zV1HNV@Z#001%O?^lT+~S_noy)FW~4O*flT!HX*RDReIp`S7)}U4cP~V;sRhOUaAC; zLX7_~n0C@7YotlmyH5U9{{t3Zf_icrvcX4+Nh+I4_@*gU=kc5#ZbT5MC*G_3YT`ll zb>z91gee!8q1gov$nu&T+d)Bd=-@$aklP_dXrpRilvr0DgaPYD?)b;0Tm2V{Qm1X} za!di3CDt}c)fJc{?FEa~p@{^n@_%RC`M z`$SF1$!;6y!=Y3Pgv(*CTHhBSzD~Lkp;aveA_h3z$z>2KO-)%0mNs5A*n zJMYykuuZ-euR11R86C=yG6Zf-mvl|o8&$^*LzOmhSh6{(wl{&Rc*XKH#^*rAtn}83 z;N^wH3;?=}V!(gVr&|!0X}KA1-2xLHNWlWG7uY&>HS`z(5Fbf%Rz!a^30PK)F-kVQ zo`v`H?k+s>h3SeRx5ydG)g^p?5eRKTG4M8sr^Qsc|CmxXd#s zu3_T0(GB)CiTFKz8K7eG=9HjRq1%j%c?&^bSWf*QmS}XlfK%a3?rhbwe0q@lKxO^+V zU*L{&oeZ4PZM`tITxNq$2^01|^o(C!nyC*pzp$N&R2vpDmnpsHv(}Ys89kvtD2ytb zX$8e&b0@|j<{Q~wE+mcdf>YH8mpt<0-DQtkfrv0tli{?(Q?4WS(UYxs-ufHs#?hvM zW?U%sLqf2%T_lFSk&ElH%^#bV3}B8e5|n)}IuzOk9xb4wT^$irX>*B6J4a zx=XGHq7IR+{|JkDSPJq+yoiWs2uZD>DueFJga9`iV*FO5j-+&*Rzx49qB29@1~sh* zk@98lBV^ua`-3isHt)9T*U~NMQFq|sL zijeJs`LA?eS3`>blXB){TzE5ncKdvX^RS(T{hpAFQ~{Q##zzr}dfvQT*UitT`DLN6 z%Q!92a~i35UhVBks}hJ^pyd@^nks9r_Es|^`uV&Yc0}XD`K_KXpF)xShO>bQ^O9T9 zH&HbiLJk}qa|RCka#H^|23aZyMha}igoIUpwls569(byoa?ip2qe`*n7QH^JfYTR_ z@kIe{C!{HY6aqCXHWUxe1k5jzUoT|@v#11NU zINje8SHQ8S2IdsZ%*>`>WxWsP6yO`M=B0b=*f9VVs}P&aNMg^(Ie+lEvxh$pMu?_C zZs#5_8aI86=&}I;;on}mi0@-#l7^nuR&QCvAOXy@p`1j*f&eGfENp6O0*hq{%ft{_ zPk9)$vBPcN<=`E9in6%z8>Gd# zNf`A_4wzfsgOcUxHT3jp%TPEE8w!V;{D26Mc-gS9h@)2#NQ_Ua3hk3>U40Ksy7sxpn1lT&Ex7MdF> znb!VV;UH!-jtX}25hlu3ZH+Xwx3>D`&7t$S*s$8q)Zg{}b;pT&I5URptV@$5 z2)6Xss>V@?OnsOA{Xz)lAA8NKF2!bO)Ky4UH8!c?v4s@;xBl%s4Mktl!Vt*eLeAfL zat5A^5(~I8Xklj|*-Xm%E%NLnDMlfEAXzL&Axwnz1@iEcAh-ZdUBmh~B|W&f0T?-P z#|_pC2F%Id5@2|Bc^#u}a~M_SU4qd-)2(}+MYXvOw$jsJXOV>11VV2I+(dB{0o6s=3eAIbxn`v|}3Y>%U4Q$R2(zOTddA@=Z zVZ4oD2(uT8Vk${c)9 ziSJd7R}(h)@+y5^VGfY zUdI$(UMt-T{!?1@q?0A|Y8CvXONN!fkR8U3`wD0y^x!sN-Q5ZSZjkTQ5Z%917``Sj z=!tEZ18=$%i%!XPvv+Wa)SiB4a@ACvoo7(OjXDG(yb`Jy>j8uD+1}KSIA6fAlWtXy z)ly$yF%i${SatK306#I~umtZ}0LQKaCWcH9C&8{94j)LAQy?H=Y8fFxfanFc;$bd@ z1ipcB3~HnjdH}DVzU*2vFnvnF7mKB>jD6e3gafQiGuJ)?nHh}f*fi^dGeRh2*#HX? zoT32QVbT6H1;Z#VILp@t1Xyr<*Py+mA7F0={^SP25NpO=d9f1)Tmj&Sj-;mXo77#) zwgR@f`}(>GgJMK?c_wgzFcqFZL#^cXCQg#m#NwC$z`0bckW_@N4UgBEBu-Y6g$*8u ziJMpgeeJsNFpqEqxPTaMqis&qb_Sy0pm~;bZWVb^H8+fb^mcMCjBKxw%TIhbnL4dS zJ6!IjgRR(?m+Tg-mbdh=+IynPe&V%VZ?MCMP(_>)cfN+{xV|4$;CKR<|!L6Ysjs}M&~d(757B`8vBF- z3soc_JG;DyG8Zwl7kYF#^H*T^acCTm^%qX%*N2K&B5`GLJb8fg~5+hyTt6A4+bsDw}tcVeQX>@Ax_j*U0NRx*H!38ApSbl}{J67uD z)h>qdf#q?!YNO~Q%iQFaE=h$z=MsH?Qx(rHPIaB>xf=?Y+Q3Ei7iECBflz0MENA_jfqS>wr)teVy-(dc2GcR> z55ycR1&^l<^Q2F2Md}3S_8#M1fo~O9r$DdUxWI->W=rom4XaLh{#kl18PzCVPK&Rj ztMor)9`9cGRW&F%yFKP*ZSQuwF+{3lHWfX|nKcGst#Aia!Is`JIwS$|qKVJtTbbrg zqpnz-sQ;VR_dg#we9k8qz$*05Qq-3_zpzAwy9+DO(QdO_yxZJT3S%wP)(G-XB{y|* z={DaGTd#)EcUSe+{2u>Pwgd6HCe_W(=_J+ZQ!IszTNr=}i|_;IC`Baj#K4@va1WK1 zE@g&JI`fv>JR-nTxatC0Bp}HB{z43=4-vOXZ0qRYO<^dM1>(FkG zOuu+5zw}X}la}OlrRUeGq*n7Bf1l~Kheelf1qHF%GQtZ5x(<1I2xIDN0tCQ>-Lr#C zyVp6vz|&bYDA%G6%5l3>LQm^xKSjUPxP$)sHG(H``_@0D5@NL&&8+-CBr0PUKHsTM zJp>fW@3BJ~pjKv?eDn`F1Ng9|^6I({txt`=v?;RIoW{Dh=9T?FUxz$_Yda-<+5_#J zSEt}Ljb5nyCc%0^csEV>-VY!!5aM31k_*4ji*LAznp416<>ZPQ3`8d~F0TzD5fR%( zMLCSy)1fX`Wr@vyT7`axL=z71pNZkTm0_nP$*1{?1GmP}WJoizPBIL}9VNZoXqbxd z3Ht?ZUD}0kq^`Hx>jHPAjGytoc>J&FWlJW92-AfSvcT^Bw0=TlB2K1$Kyd1iHMmS7 z6#D;;R_kY;_y*BA50xCyJ0sf#P=?8OUJD@*(&_vI5+eAqCao5J00rb&|6*VsqgY{L z3R}A=t48NXd%i;ol^_Ta{7p1SPZS?P6I7)42oxD>p z`G=7vZP*RCnSd|)he0+q6->uLc3`@n>hGuh-^MHLiY70Fy9?nWJK^(&#_Fd~dahs6}?M(C_gQu{-u;+UiGhiN!Y2=(4!M&`V zV+sHB`?PjT7=Az4q3bXiV$r5ijQSisQdV*4AO870UFcm&%YA9^J2~4>-d{ypZvTsW z`X5>aTo8nQhQaohAzC%NoW*2usxsT&R(^l-l;7bcNgFkNXyGk1*;a;?$U) zg2z$rcgOvp8T7g|N8?&sI`}gl}V^$ z0?xxv{{j;5y0Sx%a`N4aCpUi4qkn&Bl2^}iXn$4Mj9(=&%dW^hxX019Bhzbxjg9bR zXCJVfOYE>r9>7Mj0q8UAbb_A~a;Zkd)9B7&DKHkQ5YUv&G$-2LBs4A69j zB6*FJ3tm**80Fbs%4Jjsn|V$KpL1;a_jN&%7Q&-F6GLu7HpI}a`6w4p!=%||g}iHW zRg(YjV~t<5>}A)*^lH!Uop~)=H9pF8{jDwil<6tE^_n0m<#s_~8AtxzVB5^h({OUb zargh@jk}B&k0du!Wjh_wx4rV~>SgDVez#*uvp5V^4^V`EKP%i{C|YG-b@@i|er@K* z&a?N0Y|YGkp7+q-@}u9+PI$zMEzYaTh(pcAz@x?twF&Xr9T7jY&fqoT;i3fAelYNi z^qf88k55-*?#;70{{O9DigaFUQjY#0Ka;*90N&~Oqv2lrL}$8wBb@_O`hAqh4Fq|2 zLd`f~37ItU-tH6k6!Cgf*!tE5rka1Mf8bG*Z)UD|Xxi?h5%2?U?dvX#-d%ex(%y5w zP~uf=w@1U!J@gX8Dz?jWwLs_`!>VxMe^*A+aCVVgDBi@1>WIRYLiID26`4QRmY8=y zXs%Ix&zz6&-m(;TFG51Sc{T+;;|t<}eydB(WvCdXto&9+T-D5`xU_Tt`q+f_|8z2h ze|0jMFoP@^1}m5Z>7b}w0LoxGX8a#~aE*x^5MTso)G-)z#{n7zFF{H(^jt+$hQE^M zt1zp^w)N`)*U&YW41+b1cW|)T{7)cRs1H{bql}`s&RWy++CH}62t6;dCKA*#QocBM zC|Hzy<1+N*zBAx77blYW=$33oHKbxH!*DqSwHm5bIePGn+z$^(A>4yxTtKXL`!e7T zj5sLRy$L~xw$;$UAQa&n3s3!kwq9jFbCX!cPnZoe;!%wsLQ1bfFC46|BFu_-v<-vP zRuiY@MG!WHWJrAg+OesvUwzqmc_18fnH+J3kYz^PWN%gD z`qQzWHYph$jAb^X5b&0$v2nn-?qvH$$SG^sclkP zWx)lB4rH$MqxpVa#lt8ZiKX)sq{Y)aO;t;sJ3St!5?=#=lvxaR46;E0&k`4EftbTt zaLz^E^Ft(Z9@KE=+I!fbeP$zg3y zWL*UU$?oF^VH~4(bAnlz8D}*s;+xp!r#Llk(Un<

VNj+VpwX*AnKiTkzSt_?{|d zumqA{~+p+Mp4sLW=InIe>i@&ERM&-JB?a;5#2r)?lc+aUsRXYRY zAOe)d7cCtK!?tR1itUi+?hK}3WMj8pAnv{fgaxt(5i)7O88#$q6BwIeHtE1Qod#Ar z4;GRjbRaekB;9uK{h=r`ew+lt_6YozhXe7kD+XS`@QndR3vbfh#q3F!eIj6_ z-;X$hfw#QBamEt^1Zy-VG#vjZ+Cnpc*oMP%;Q=DDpcIm%pxFg3UIh~)Nv$$MK9r&R zM3+9&A<(&<@Jy2)mmIC!5d9^{75m~#_#wIN#{!lmBzralXO?Rs4>!rbNgl3uVT=O| z3M?Toaw|MK3s5N)dJ#(N6Vwwmts$Hn34_RdGTE)uD}>X~&{z#0?43nYex*#o|9bVe z>er14lJMGjj`(g3P)`t`s=aj!uw4Qb$}2Fi1+ZAvZveo^Le?>oe*!>Y_nEennCJHcPSg5n$#lm6#Nn}Lmwh5%^r@kUNTB%B=(!et=RL+0XRn+pgS zO9JtKFu;VV#JRbL$WAu^q|bWnDq8%w?-k=kC*L>Fc13zgcW*7(_Dew}X>T(^Nhz}J zD#&U@6!p50*tjR~H?#pGerDbk1TGL~t`kNWP_lAW9IiPMV(Es+2T>MLV+kN2Tg?CXKgYr2;4f=Og9y zwH-@|Os06tc+RUdD+eHooAQ#$u<;@ zr@VLsIaY+%f8YooIN*B))Ws4EH*yxZ#MU^*o|4~e#aRlYADzTW{PE_`98w+xOOQR(oeL_eAM{6e8b-v~k(7FZBGKudv$F($eH@Q_dz3E5Yavh*6MD zVIa7l)k?Kt&=*{c|Kh~4{#4l;@X|5PLmdxf&seeV%kJIn9JsgMK!h-2IYTEVVWq5G zeEX)+X8ZpXx z4PWmFB$Tf4hvie%#mw(fP(PW&;Q_VF4$2b&pz>PSU8s$Q(NjE)wq9Z+9#Su%(gL@> zXGv`jdV3N+B>;0N)SDhl>%Zsg#x#XPaHK%e%p+)1r(aLzO}57l;$n*Be$gnv%!Xld z(~hvuNL?#@f9{*7sPCK1{J0?bHT=8v)y3BW zOV$6J24b1c@*zi%g{9o0`*V_hbmn(cYJN7yvmr z!Fh`3@SV!q373r-I)D?|LVZorm4fpGdQ#6WKKqVzPmx7?C$WL+h0JRkk;BUg#?ng3 zN@)NLf;@H!k=ol8F7=SeAK4k0uUZEA`s&lsN*}+^pCJ>G9IK8 zd=U4JW783Q{gZ1(%e$vwMH>eLRnjbGYlP){7#JX&=)Da*-CO@a&RH=2^2&WybguYg zpL9PUApcglB4BDw2DHOtUh!7`&;%W&wNoB$QGM%I&yY6DmWeGF*w|!0r9wcn1~?)wtH=fAMOu9=MbT*r*5B8Z<8}&wI~ADNoaB0yV-i61 zx+QY_+$&a`eIivO1S$vgJ2Emc0lhu}X15&vsBM3w#}0^?jBJUb@e12wxvB_5RE`|g zKW_q#A{+rWuE_2K%UKxOuG~B9&{2jppixAF0W=$kye*?4mIEesb$OQEGTgK;PM2U! zvY^lya6Zk!(MWT%;TBcMbrEgtdfcQ+bI5(wj zIb)3(8N1+3n*Uk*eQF5kO!m%WmTXGEfj5J12Kp0L_h3P5lL(rAu@YY`3Kb|>w9<`I zAa}1qg*`B;H1CVc4q!!-mOH+)$6 z*ir>2L>wQ2fejQ?U2|5>Dy4}(>P}3KydT=K;a7-a1itvHNgHnFjZ!D#i4m9jnvf#& zn_HTDVMl$6YJzImB`arp4`53@Xp%;V&`0H7G2_G}RDtUcCY1+Y2X2qPT83?cFb-f1 zf*)$kg7~X!2!{ckg^3&yGt39Xy~w@C`Od*<-tqnAws+W@(6K=DoYd>1cVe{vp9kpm zzQYoALLI%eXw5-xjru7qM-asn6lR4u9QAvmoa1HTp;_<-2}du9$a&wCVJ7XbwWESZ zNJwj+3U}XdJhcV=8?kORzBpw`-v#*EZk4NPgZga=;i{OwI|JFIv8`K*A@dQWHfoq( z1Sf=hnl_Hx66MtnN~$$LM=Hvzv{LmxV$%?q>6#8wje>LK-|v9iRI~|KahdC87K9x; z8gTD2uqAs^;1NJpv_q~AxAvwH%b?)kC5V)h)|fVliQlBsZoC9!Wz(2F8y{0MVinqJ z+I5Q`#TK`2v947DtDXwSygyerAoc(aw1Vbk#ViB+`{$pPVu|6f@nBY77cb1l#s(R` z1NC4-DG*S(&?Ai~80v|@_vFK@EWuvOG;O@@TLG;q99VL*5y_#2g@p+gu)tupU46WE zt30|n^?mr=*{U+Ej=*SY0v%YN#Q=N&qUyJ00`A+|PvdCFki3XU1%T$u8u8Ynr&!7I z6&-v0?vu4DM>oDeePAQ{26=aQ;Vw&;RSpZ>?i0=!)y)uK(GNqoc&qL=fDy$Ggnt`bFjvh9At(Hj{R#d{<^CfXh{ z!{9K4e%sfs@YEt6u((?7|bt+2rRA5E=9 zA|J`o)E7jf7UJ6`CPa(6ad%p~`PkS@K~qI5WaE=8IO|s90>=kjEvVXL=G510;8ItO(!&EC;W)BQi6Z4>I6+@SFN zrpWrBRV|@}xKAlB;mO}(SF}_^+^$-%DzNP4yS)m^?O?#2I(-dKvF(cUxn|mW8`cLZ z!-pR@T|CLLYT@#S?byW(ymkGe(7O%(|5yqcJN9P&G;)!K=I58c$M0z8)xXl%&n6sb zEF2BaxUYY=8kiQR{@%^2PE@phN0OQ@-m80hcc~8RdI+BZzEQv9*RB|7?1+J%l8;W` zU}Oa5ycGFM#z>y$FUuaRL#BS$ITETNPQat|rQDd`GnC zN*P8*F1O`44}v3EJwYsPmQg*$#tY(xWCm{yoFQ+c$3_R3JW?Pv}q~v{os15 zxn>sgJ4lbXaMbri$C+!&h2dTk&TM-vbKm(@?Jp86xJJZS&ylL2Q8Bs)#cO1z)i`C@ zMlS>oLuV&9&C0Q!&*Sp?!g6tKY>PMxUHi7w^73F;=CwaJUv!A8%eI2q8{S$s=B`_= zjnAlQ1di%aou~23fLR^;t&*a*f1Y|n@3+QTc5mlFMVe@yJ?55ECjeKI8 z9CY&vdB<;hg?&@RvOWnyaYHVX$ikZ)HozZa!94)-Bz9klE0)-4F60 zi&4OdOiN8gY!m?1L;`LZ@T+;B2z$~NF3h(CK&;L{nLqI*4MJ4 z6yoJ&0hpZV)DLke`r6>i$zfNC1Qm^Z{e93Ps2mAw_|bVTYCx%E7!oT`9F{=ZF_W@f ztdZq3uIWiqodVx9agC0GPigPcx= zl40n4rupDWS-7f;I~CE|7H0#&7UGjfK@MTl3tvI$!s>IB=|rLr*E~F87^aN!K>q4O zT!Cy$H+Od^)dX+DfhwIn2_cq_I-0lERJI`wpAMaHucF)@`}5b_>Br2sFR|f9j_DP1 zV1Bb4A(UI3H9_nOl|=ub)ouM)&njj0v2&t_-$1e&6= z+P5DzS;Z^G1|1s)As$%+5Xx0J5KK^zu%d)2a6XACLf%S)<-QUWRLH8_@}ZaP-m){+ zBL%x_{#?iEKm$U%!8IWtl6>K=PsJDK1F#Pkj$r5srYeZtW+=2kFy)CyWkVMUtnRfa z>NEx%$Pf8!dtnoRk-vf^1pJz}+`1RRP?9~G$AiNWt5PQuZIB zVCEYzitCU@xa>kr+#}EYb#J`X$dNsJhaQ=|uA0Tkz^H#z*HvVy?;y+%-2|_C5En%J zZed&Crr$?*DFF2htQp`V3DC<{mX_8a3{1iNM9FH8h6Gi%e~)^u3s0*^XRPDDH!c5z z|CXSjX@bH6`5=DP^EW4;efNI*HrQqlxZ0Kp2YWIhJ`#ra?p-te7xudXVdAq1i64~5 z&FpMLL8KEL3w=`qr7mTlwf7nM>mu`AP_Svxp_UUP@;W_)?&|5mj0YSgKoAU>VWth| zgoRtz$?|`Me=+1m5*X@05;|o7bmk3Tyhn+y=fUN`Tz+2H(|OX*{Bqj_qLFQ4Ny|h3 zcp!e4W`+Kj`O$Mt+U(^{&Vjz4-&%(zU_)I1$zx}TZL%z*ct7+R{{DHcb38Yq z4y4Jd&yH63!%c+1d~tJ#3p3;2Tk1O#O>jqR1?8%(*zffVxYu!NpFSl`sM>#c zm9qiM=JKt#K6WQB2qOcQHRG$_hot}Z&x1iQWHltzz5$b0uA@ae(g7)nC@KxhEfZ!Can5DO0|rjN7SpVE>L2pP$5T*%z)O*yJ`7 z-sq>Ph1Il3Xc3Iq>-E=e3h{T0pLdDB!xx`DxnFV@_m?v#cS%2e^mM39LuDLJv(-#&;NCn<(UJN3g;u`(ZaNE7yUdqb zqW7LW|5@Ls6zO$*&i1S8q8g z@vg8;E4b6tC#oj)T=%$o^=kU){l=oO?(Sto_ubBz)xzko-pY5 z`VoX?{BB@JW@+RXEfNpw#5fOy8s5iT-MI+#elMf#4S#!S|E%3vmGeJ_T%`7Cte~SovEXtaNAKFP@p~Y;y)bAz1Wcj zQB4E=?=keGIxTBxxrs4G=Kbxs)^u&F|8=3aBB9+ zE&Cgd;eU7FuKgiE=`ZfQEO8XvsoO?d$=sjdZbw)H?sHBT4FG!sC$lYW0^|Y%XMfv|Ih=%{icaYg^Z=g6d#K z{NmwUR}<|%{zoS>%ts!4zPDJe911X4qEPGl5ankZ&)roVsYJNDRb&+8<5bGYTXn;*lo zj6CC&q6$G_u_W?S?u8cje?QOV?&9+DA&`Ot9{s9zCep11XKqfvcc<<~FUe)xoWLIn zb*w_b`7ir6>tEjJL+C<1b>~h5X;bN=EMD>7&Xn))%>BZ`lGUm6AKyN7=ny0s-iCS@ zkHYA0&-gAwpc^Wx{;8|`uEbaCYLQfSF53+kM?cego$44dhI&JQwHv2)W zCOrspYez5pUisg5arv3Ff&xrIazB_-)6y&>`rscBB7&+H6jD@TDI=ly&L)S z@E5EuTL5ujTL{*JIeUt3;KuL!&I48O7gSZx(7A!Qgi9TG4X_h*7?)P&I-%JZ7mt(K z;A@Y&1|xpa<$eV~1BY=3&SUqg;GLt)N*x@6!#4Wy+nOs2$@f>PE$6`^jGo8nO@oK? z&i0r8`vq@OImP-EoV*5nX9G3r;)P zHd=KF#l^*7Ct+bxM+fVaK7U`k>qfu#FeFuTaJ1;byzZlEBgY?q%)S}YINvpRfbm?7 zn>ly}Em1x_)zJXU?^}fzavZ_(L+!_HQp`9+O+64IIXDdk{o<(&f94Tabz4TQ-lUZxe*`|+lY6lkfKjekPg?9eu=QV z4CHO<6TB)j6(!FD>BQg1X9}cNb4n_7 zhhv(GP(a40ub;8V2BRcw6)O{(W<0W&tv0di$Sx;IawZwL?&Y$1W zrU=b0P+TMz`8xn{FF-wbkWS*8f)AAf>)D=RlHhs>2Uz2hX-Qbj!gJAw_W7Zr80V{3 zze7Im?mc^!jmCJSw7<4y+cvrx0<(kTLO#PoL9~sH4FvLH7HY#x`tRChTDwBP!!_a- zRV7dgshNFRk#eW$etzFs3-TS>Xs6%Q0~`(b2f!tr7&(;>(C6?Q{rCy8U08Lt3T&l0 z@LuAxpviRyOa1xkpVR%dy$}ykT|2r+B0GZyYdLOLNE^66sV3_zgU?^erTQ9}!NNf9 z7G`-mf3bCR>N2^l)HFG_M<)z36-D1~Dr(XVeP4rHIrP}(0{XOr{Q2|lmQWC>ITx1> zCg*>cOiF9TYViwKE$f-ThZ#0>lK4>LLjTEw3YV0CW#=0@?qi*mJ@QF*RR7b~LjSZ| zpeqB-sdN8(|8rpkxZei0?z?m!qEyMr``HV`SPWGZmk2p4y!c_*ul8a{k(4qUjkqTEKrTuYw7m76n)r@aY(QS?0E*rqdhXOg z!IFANu7tc;_sps2g-YAJ&dSWZnJ6_5Xp&#NcmWTr&0Gr)sS*!}K){+uvDy2y7BvD? zlToN}J|0kR*uz7dz2NK6A87I;{4w(Ep!rfZ)Uy4&Vm7EU_>^@28^!;%_nu)PSqk^-r&@oZh9% zs7^LFUyvptQDCPm`K#Nf>Z@0pG9puv40NPA$$)0$$KlP~-(}l!l-Fb(f3-- zz_I4TW304t`z!6(L1@nI>m}BLUJ+ATqI{`#Uv2upQ^E1ykh~@(<(M4s@#9D0lHRv+ z$&#pKya?=7C|{`9rtrQPY8us5U_(t?NFqA}&Gqx*K2_pAu3GXj z*=x)exm0&#?UP0ixo&N3Z3QKu|3fcQ9E~-N`qU~BY6F-Jc^(@iCCmI~mW7yw*yPb- zqoa)!AJ4~l4eQGFrlkWMG&F2e6yGWg}uuEwU@#{pUk_-y?G zgMff??Sr3pELLa%&=vY9W@;AN^H;`qmq$DWv~~Z65MUaXvJIW>)GcU zW~oKT#B?nFvh8ea3&xB}AAj8oTj>b=<~C#}A_!>7ZM-nSSe2%b6H=La#ga;oj7EWk zrOjuhSX^yV;bvbnjYaV2;9Jr!7Sh#iFw?zr%r*eXQ?vg|$fSDYo$B&D7`53R^ zakNv;MlKtHIMO6IqlgU!sGC=R4t+lFZ~QeL04t7RDSGpsf(5O-j^ST7L8q6b6RSzD z4$N$ix!afDJ@}5j@Y{lm@}u%>CzL!O&_yrxIVA7x-FBjs*RP!xJF{vuvC*C@Q(ABI ztSc*dT?s$4IQ8t=X$7C`vZZ|DAX4gPF|`u&q&%N~*-mplHb=c}u9DMYjjXu1y;3y= zV+Zy=WAMxFV2{|LzVI44r^=>=+BGlRaog%bYr!tQn>fduGnyVT14LyBjZcgjm7nxo1AC?n*cFZ=Jti?iFpogman}b zr0#SVW4!?b9>Z1w71F0s&2yQ~Ei-`{*j>o%U*Oey*|;QCgFQ`7ig5sPW9_`(N9}Z1 zpp*jydm3*%;%t(C{S+k^V?xidC|M?b6e7*4s;V)3k|XR79FBn8i9--DBeTN?JrPZ}X_fCM|YMOiaR{3UpcnsaIIHr7T5o-V1Lb5dxom5#yCM zzn5Rgsi->G=V_&eA+;^jH09`(`PrxjP%~KxM+uAm^1FFJYJu_#!G|5&+gzia{V=f^ zN{@uVI=7HdOgZ0jdgA%gzyFr=WTyKE!_>VYX!jB~M4mcs1~kz6w1seOF8N?YcuvNEN2n>vj7 zKERhgHc%0T$?7$G+f`~pu3fou8*vtGd?(v76^0A+v=JNbh$l_cvD~fAFmbwjJtqF% z3I#W(%V7mVw1!Od-}c69Xk$+Ww>Q^*KE8xW(NtgI2oQFetcEZF83G@SFUt%sEGimR z9lM3Xa>Qekc3@`B>8{I**2V0<9eMUmiiJ8JxdvOFx)+CK_=FmHz0|N16;eQB@zXdL3c{g*$LNx~m2ketqQ)A9A3j*qH z!S&ZZpMyTlTHpXmsKGXjg|l;Yb@dcf3*JmikGYK$iZ_)F$MAPr9oZY!ukS;l_de`~ zLQ*b0k>me?Ur?||COyBxEAf=#yHYV~+9r8%gQqve1(#lgqQ*h_Y`x%#L^-yM(hY9NqWY%cwGIn1=4#1H=rB!Ei zK`h{cjRg`6h2OCmRxOY4Dn0w1txdeA&+VP8_m*-;717vorE6|K{IrOv7IW*?EqwZW zF)`g@f8Dr&P=#DP7zy)yHbaW5Q4;^8MZaFgt)T{=XkTllfiwb@qOPgiai8pW-f z1*`!PkOdz55mD1D(nKU0? z@?c2_=%7@eU#`5x6&Bibq{m5facfA|Z*_&uHVHY80M5`}$5C>&cMW#2TCobn=%;(% zKK^T+YR5uk19`UHN=r+_359r^Qv09-nVPq^DCY?@mnB{?mZf=T_OtXa8&CAk^2)DH#9kO{*f)X4A)*+0({GbE$uWy`p4F#Cac+R+C zk6%WuZGHcKDB{o3UziszT=?|qlSmDiw9s5bg9pAVq*O}gH;5|fZV|9}aMrxP?UE+v z8XXm-S;IrzXiE!=;fRBamn>PgZk_Xov4PFlqkOMlpZ}>PpXVt|Iq4s;w1_$sW5zkUGd?@jaHZ%* z2x#MS%U-?e-@S1S&n?k=j*N4MZr;`9s&=IGY$^Ep!!(`7%!XW7N33N#MR8XYp(uCs zOXrD2B$L|TbFd-`_-0p4VjWI@zCh}nRkG-x8E#{~`Qm%nBB9P=(WBTn((hMnttjx* zS(|;=x*K>|_C{(~`?#ox{(ZxUkJY#7vT`AK4+E+ol}k4> z8Wb2UdXm&*!UBA0BrWCgLAFPj&f4#MhM3%TYIkh7D_-fM0f-92b(Sj^9)YKT15 z9KTZTfXV2m0o9{wxtxZEq|7rO(%`zJ|sy1oNs)-m|qrOK~)%N{RQ)X#d znK?$-J(rGS+4i8iMt1YAFcqG40444e^$wbr0mSy#oW85?>)Ce}w_ZvsbK5vG-kr6X#K zSrKK18H?A_oG+zYRZ%;RuBTXZWglh7`uFWotbg)x*{hCF_J%c z${vtGX@{%0%XN&~gH3QO{|j*JZ(!MX8gyf1E(#Yzbm$NQzf3UPv-=8wl(D^QS_ead zcyt3lopy6sg)}WQjijs(cc4=q(GKSVqN&0D0~i2#o>dD%l6m~a%!O7SznysdNaL=| zu-)5LZCwYhQVQt#JT8**XzPozvbxcF0K#J#vw7S(O7P9|=O;l<@m!RYbC-$9xeq;P zTD1P__dZN%CRswy7`N5@SU6cn?REop2f**`_!*A6&Q~8Z3BE8>&^jg)paBQEm6O;| zO7^I!`P6QLEHSeygGE+AyUUM~>((8C@LQgloHsx{b}R;9;)l4ZQCQEc*5sf1^hc_W zxV#grrnQ!!fCx+g>7lG4UV@JpY^whh64vUeetEF*v^~BBazydiTr`l-&Lbj;iR+h7 z;zTG7y`3InslYsPuc`;#YFe~izy;$? zf=o}=6ATJRpxubTtv`6<{qgbjcP*<7g(>UTX=d(#vhy|VkC%$2lS)2(_gc56qUA{Y{4|H41G z%>+CxCsGSO1Ct7r4|pfV;J=Iy07BQ_QB%UWNbqn1cD^QHYPxM%$hJC%V(J_%9I|v) zwEA*^y3gfmA%N|S%DT|2wYheqGR-O(zP_y+81=*Ma~w`C-d8|-!nCkc%~zg1+oNQ& z%W?>UD1El(t>1RKJOvI0+a+SY`^g-5#m7!tb0!S>VWLekN_`G6##9vpDhitQmj_MZ zp4|wk?WjZKQiDqYp5mK6U=Yh+qV;TZ2$iwBLdX^=aXKm37T{D6}in6;rEuL;&9*xaty zf@dqbb?Y!7xB>jS-JR>rL6RwrRQL__G=+tPc$62TAi-LTt?3OL@7p+q5D&q>Dzc@j zvQjMee&^o$sG8n&e%m88N_N8z+$vw@Mk~83i@QHsK>7}a$H1A@8RN+X<8BSJN=k-_ zAyO?FZ!s2aT5w8RzDCfKc0u5TNf=_gnkD0MZy_;h*KYmj}1M@Mr+A5i_-g@2A)OQidPf?;7{J(r5F@i@9f)+3A^ke6G7D-g+k zksj4EYQ$N>*m@7uKkyBax(UZM-<|~F35K{u8nO-dmJ7tH`mG+YPuxLf)O^t@T1@=8 zjVs6UrFi{4wQ^BYL_PXUZ@zW;@=xNzT-qZxNVmT3&amr3X+yoEJXuGPoju_wv)nu+ zX_pscD_A9MqM+4pR|#7L z*r0E%R8uc|fgHKwo=yC`Zs)?hZma0muoGYc_RyN}gY8=ZEwUkrr?E0#EeM#$<80L{ z0rZ5Z^cGp~FXxs8amHq3@wh6=Pg0i_2+|sJvql0lrFZ-r%SxdfJ_cUZZS-S##B{o? z_rKGslqXm2v|^A~9m^8-f-C>Vjf3T;EYw5=pDxD>S==GlUXvKqxDDc!o!|W4cYAu+ zNb$}EgTzX=Rb{XIZ~BeNGqATR-{Z6=-=F`T;Rj>=wz^N|PpmY2@Sh7>za`{O%yF~e zBKC6v$&&R2{nIRX5Wf^n|68|CvFw=boP8vFTlLJB**tFYP$1A5Y4pIEw(4L-4D5df zi{8kC+@(x}HCjPo5ZxRn&OyQ>EP8(y%sAo6>sy;{goRCD2VACVH(T6=KylJ}P4j{U zc~m2huB4=E6&k%Rvp|js^Ka}8TFb2fE z@5Xl%DF<#i>^x^J1f=y1n|neA{k-EaCuq4#*JftJNft=JG1{+(ME3VHJlJHaOi{WY ziF2RiMWHZ`?>8cQLPn(X1QF{Zt)X4J_RhXn)=_bBR?n}kkY}Mss+y{;h%3LDh?A+8 zvHj1OF#}n%Zu`7Gzt{+lCZm5x=k`v4qkwd~>C9@>6>$GoDALf3@G*gffDDCrnIyOS z`@d(-YuAOnR=eF|_(xGOF_X*_hrx>CSnHoQRnRQ<1aZ>$&ss`$cP-qW7c>En0f9WIR=?CkSooK+YL&+^!_lCKLVU6VdZ;ANvFQJ21(boynja zNq1e<^HU>EMzC$si65623>9wc$!$^9XzL?0Y77S7K0fzKGrg^^V?9i76Yxd}D!BC# z2dW{ef3A*}LUg9b$A?9I>4NDF5X3+_Y75pdP)FYko|_$sy}-}gGXpP$0;@uXYC`J( zkxI^Z2u!|Jq7^|09B4o*9oU~8pt6lg$-reS*xu{S@mEe>Ps9o!QkMw`Fmm3hGTL+# z?J=##4o9cNyX9u5b_S)Y**`fy+q3$7j4MtM#P;ft0t)A|fPoOku{HE~^qhY9@L`~( zvE>ku`iwhGGWX7PlxI)(Wc_*FSlE)Kk#(>%Y1i z$uX+G?D;A5dd?q(K7`c6mA)hRIQk(iOF5vk1P7*syLab)DJ73puXTEK+S+QLh=Ym; zI*{cXXPT3t|IU-8cKGl+m;5gwn8w5HPxg9MQc`uqt_9U%U3I*Ua|UU4Ce;~7usoLD zhQk5XF|-Xt-aSxmGjwHRXgT2$qwe3=W8-I3EA?I!>6{*Y&bhwf+jmM(pHS5_q!(hfR-o*Io%MxD?He3YVB#lu~nt{5_y3vY(uhp%F)LJH;GB_5mM|EaJ3^3ZE} z(F`8dAI0!BwiT54DbSM?%nRmK0xcx+7A9tKCA*Ma7o!5lHl3IcT(Lt-ulU8BIde+g zb{aKizA7!%R9CO^;xPRxE(e;RW(SfC;Ohn6y8iU;TTLD!7FUctj2kvfAJ*3XOEBRl z%WK90+Um^v_wN&7jj0|8V;BEO$E4Y)UE5@`=|5dd|?d$A;Y5RN&<#Ox4i5^ZUL|Yp*B=5d*f(8fj1N5H`9{_ZD#B*3o<` zR%|`=$QxU)byj>b43cRJK^yIB%d9EhSHi)d7)a!H+Fk`>jaOLNB$!$Mm*}kt4ACXFpu_V{DFnL`&^h>zhQUuIPl=prpWU)`h~Y{ z-(G>y9bhj~)@Pr;&O$6q0@VO$rBM&Ie=vx69iWXVEh+iJlym!i&6=I(a26v6M(1_s z^9~k_h@95e(eF_u2ZXHF7(*t3A}hk#7_Cy%tYAjUGe+%os@_nS&w=NezJ7iNM~OrV zV_-G&A1>?LxaYpUDW(%?`v=mVRPHM3LU;drQ;Tq7j|_69vCJ?ctitzlHy7Jw+UGX- zeM(>AU@KLirT=aq64tBwOkNG+xd07*lnIDYGvfTwe*Sf~b7FTRUgz%;dwLi2eb7Es zcak0i`3l{tXB=^L4Oxz{O-VmlTr<9q*p?q}fL_cX@Z!s|GC}kifiq%9U$DyQr$3wj z*2uIEyK(SJ;^;?}z6&c)p~?2qVtj#@hu%#;t%j}Go>1V}`d?n^G#6WV@H5Xn*hyP1++3np$ zT`zQ1sfAE+4xVDKj{S4>=0Bc+5u5H~95Rgimb`7Cq779-&2%LCbjS!uGdkShBVXe= zEsQ4>61OYOt$c1+6Q$J1s99#q76%;o==)ZpFhJjf`@3r7()O}A&@(RkTDyJ9bhSZS&(oMt&>7>%B`$OHZ!r)*>?z*fCL3TfN5J#bv>*yuYNm zi#Iwer*3(Tg_E(v^d#CRumd{v2;WZw?bMO9aN@-Zy;JOjM>X2nw`&IpXg=-l9=Uzs z&LA;i!WW;ZztuC&e@Bb{qC|gj0THSxG9`AnY(P?ix@(2CZZcGM@g07$y)Q2FWLYSd z?bqmuXW`XzuvHy1F1bSnhc-A%DD|7Zqj#!lfz{UaMx~|DAfE&7Oh)Ias;X-2y?a9u z_H!j@H67SfFdq3%pqyvi9$4Q3s|iIfP!*|~C}_c`y#v`>)ti#0w)MOz6z^20=a~xi zKx8iBBYSdBF=EFKY{dr-BrgNxlT~BV4O^Zw<|o+r`uKpkEorZP^+O?}wo}4fYdAcv zR#y`88f{q&AB2&{)HYl@O69^SvG0CVF;qS|c-^mqm|yN07d*|E^_4V(YR+?%RlJyq zlN5SPOt?^d7j;Ez;hxD^7^3~y53PjTFYT(FE_>(^MO^;7ZyKfaAed~cAcisiV ze{rZ^Dk4Og-4QbW{;Bm&uO|xCpWRUKVZ+|$@cy#kNQTsNI~t8DB-_5%t_36T<3+*4+Svk3^15wxcu%tC z2WJ&TXPM9V9oy9w6;F^9`k`49wX*kEKykD?Qc>(rfx_`n+r=bco+1|*<8Hy)Cj&dZUK=qjA73P#&1a2ly9 zD_aM|uhO&Zc%FURM(SBp%KSyl;^`VXb2;dQtt)~Ji?QdVrO;)ow#b9Jsqw#YquF!d z6kuuUbb*4$?dLDEP}|IdXvz`K@LKKf+5FdUC`w3l4~6Xlb9@5b6<9=_r=mm7qbb}O z$9R8+*l_0uja=@`6DuJv;kyIjO=vKB0SE+_>bfOk8?{_9v9`M%><&~bD9REts^=Uu zIT|=Fok2Y!ELMk@l-GB0k3pg7=;+iq5arEfV-H!*PMjV~zjQpWT)S>ve@MeCh)8RK zsam!sOefiH>@X05HO$l8xQA_1fA4@nyygVC-(U-LYz17Tl+(K-wwwa7zfwz0ZH|J` zC7v7Iqbq%@%i)?l-JPYH{I};%azw)5zwCO#0Y+&0DTN@V2MS3hN{+_MvM-Hgo=hiPq}qKevfZEFb&zlbZ=lK#@Z$~?1kb%%C&aY}f2_{Rl5@+?=p z?JGWyJ?FQtE{WuR4k}19Kk@4|e<7Nv$%P`j`U~T?SvxL_?}&fPn|&IZz|n#k*LmPp zRn$A?3|GmR{K*Sua6Js8sN1OUb;ze-Cgt;Pn&-~Xt0^}JmnYa`oF`FsN4lhkg1z`# z8$;=GY6%9kr@9&Cg2w=B`BwFe4i7Ak(z+;b(_lwU=a8i251Rj*6)ojm?BW*vOGyXm ztSHq55I_TNg8+JcPNo93lE919hxMB_ZMuarNdGCex!V6y9O|&4{cF$adOq(xheLwy z=%YxPvaR}j-0RCLS)6QcG|8zxK^_vqD|A?m%||l@emk^7=(*iL!CnX3?|{9PS1rK- zeTDtuYR1DDD&M_;^WnqFuR?DM#sBV{RL~W83#xO56qk$+_LRMRnac?@c4A+<_A?<< zN=i#f|E&KrvR>rDOnv2y`l_^S*W@Ghl5j?8nkluGr1-@y0s6@8(MA18V88n81cie- zI#aq+r}l%;l%bw9)`WD;$Syvv*2b_XeY+EtN`;6rltt$CePKhrk4|-XTb@yvjLbn#+W~2jmZw9RXDs zIO80n|K4r6SJN{HPivD*Pvhs3sdG3J2#3N%Q3}0+ZDB8%Q)}zCuY3^<*wo9gNoy2x z(4t*3!mE7$BuAm>FUlh4f_OBnTk&sA2wd995N8AZ++-!jn;rK($!t@+-V_d|b^}}}5)#Q|EH7c{~=c3(i2Z`1` zJMiw^#2POg*3N=rlC8P9xmX@?u4ZuNmk3`q+XpmA4=Wmn<^&=)2csT4#u~&@lUbmC zGu+QwSXgKT%V`HW9zA;Wf(0B8~Z4q_-A2R3>(2sHP`~whIa^q4f5rWOTiYOCAdjX zc0p*x0A<|>%bMZ+;3240EZ2@1x+OHF^CD2mK8+P(op5|#bwm7<^@~8K zgyFQK)8)H86~$Ljp8So{9MAZ^fmX#f!V)!|NjT4=xj%pN*|TT31AKrl-CD_z+a_O; zb3DHh%Gf;PrK(+%IRJ#nT4ppZzt{o7r<*aV5A*k*O7%@b5) z+qdxk4nyE&)j9EImYvn{Qke9ksMoR5ht~u*@u-t_=rN%O1?%0(F{EZ3&w}Qy zV0!Cj9(UW2!SKn!HDIYbgE6+EwH60EiZ#jtMlI-7YFM>r-!^l_6f1*4W~T<=5FKr-*1Q5R78M*p>qNZ8exaP6T!y!#>xWqjn~jq!L*g#U_W6}dhF)21?YwChksPxzn@zbHh!;Un8;>;-@l!zj}Dp< zmIVmh$QwZB^92M2z3MH3#(?g#9~?eMj=1YSK0cQ(gJ7j>3PZbSwex=4mKfz5L-Tf1 z=h)`sI2~kNOj}wTfhlM0gnzOPc`y5O`*wrc`dW8U&hgI6@=2?03+(<5u~T?Dma!(J zOD!w7p+3ofi|5-6geJ5 zo8&Rl7rz0ej*La|Qh`2fN~2%GChlv%;D=N?XklY(=Ye*TJXRLk^$*^uGXQ5cc`)rE z5NMzTpcRY_5XS=s@;%fCBIx<$Zj%XWO+UqvxT#0OZ5+&Zc_9=8Yi_keG2TEZPR0*F za_|oZ3Wknk&wfz_CaH~J$AcinWowf|i8x__Mw zpbL?C!w(Jflx&n^N6?xPF#%`E{-9nm(XeFL5?tKSrxut8HNHULqu<`%Tq*Pc538kP z#(O;EnyW7mBqt8X)C05a{XecF&qXlLt_fk^UddYoHw9oscfV0IddqBT{Y@l3F&SJK z63OgPV$W14HuvbSQM$jBkI%XxITbVH52b3+?T+-fxTj-5Mqs;E4N-*KF* zZuBUfXKG#b*#LQ7Ds%@<5*asdf+xBGC(E$iZYWPccI{PD`^>uNl(6mO$01nN3>6=v zl_te+iBVqc@)J`uMlT`?8OyayO}^Su(H%hGpj+0;;TVy(6Z=`a15r$F5(I6r;{QE~ z5$>)lHz2w=n1`dB621x(mp}cz&JTs(ke%*g9=ox33IwitC@lM*0Apx{&|1l-n3s9| z#Ib~x3>O3kh8x(m66tBm%btr{dHqDG5}1;)C~)>x5>qn&2Oj4IrFc)LDPuN(`e%9u zrs;=B8GE(z-YeS7!H1=SpNZ8-@PmzuJik8ilEs(j=K*%sFu<#g(DqJ1Eyc-;>aznGK_6k6F)MBf`Ri=MH8w=z| z0Bd7~j@0>-A_gtXvMhAUr?jEZSMM%@igASK9vF_$0qzA58^}LV)&!RP_19kn$Bdcw zqDLZPR0_k-8-!f$V!06+sO_%>qkX%I7ig?UtMsv#Qe{pZ)mOCClbpZ`i5ueb8i1~W^jNjkVUK+_mD4u& zoKlwF8{vC;w-WM+JRC~B_wZSn>SnR z>3wam+wg^fDRyR7gb?|u+Yl)E@nx}#X?;Bd`%Z=(-t5~drVyO0#A@>$54eWvr(KoQ=|;U4ZQ}jk=UR|Cpi#} zef#PmnME_J%W#WTYVd^uTKA$)Wm?+r${+lNb-35b{l~>uJ$Yj94;cPj405a-3QQgN z)AJAoQry*$!&G~19|k9p-ki_}JL=9XWs2A_jSLOt@Mtt1^qDK`=UQ7NKN?`geivj1 zaHp^pZyfe^Yw4CtBD?A*s3~pbEo#Bt&v97dc^k}5Y)9+x{=~lT>hU4>0F45B0y4lM z+uR?Kf^!8Y2@qPuYShFR6dDQJvqc_gEoWl;I#8cvUSMW!>tX(mp12IS_nfiV6nS;P zbzJU%H6TW32i*khTv)!ve2h4DW^D@A>qhsHJEd15sa0D&Q*SQfX`Y#crV&luwKTAp z#8hPo+Q8rd1ABg>$Cz(%a%3}6^HTpv$ZM#~0JQg{RQb;BLtQ#X*-r7zG6?6x zV=!vgzmkH2lH5u&s@exIWCE&w$BrHA*tGGUI%rOPCES^4FOi`izlZxTm8T&YZy8=X zM6PG+tVD3~dTex$?YSIV+Vf?OU{oApl;e{Ls7j$pgD2aRcyJkO>(UTrO*`yE8 zP|7jzL+ezo{XJ>QsL*!03xFn(f2_Pcpwp68lXeFt!}kyZurH4;Vx!c|)`*1|2k0Gc z(;CIZ+dN+>%85YiGPT!%Pe_chtb);<3#aXxn0aTR$a)%NB2k%u`rFcYB*lQnSpSp$qzJ=!txfa8H~TguHzD65mA3 z$;|2x?N4&V=y(76YZ0hd2!wEGj7HpSD=)UL=~>~G*G1>}8W%=vP=xDg@FtULE&Ku7 zMfZSwN8{_)hI+npY8Ql@=XpyWUD)3vv1VL%PU|Kow#}j_zEuD5qwDf|MB=KQsog^n z4kGsK1cQD}!DeQIT%RRKFiYYV+_rsq{Ucb<3Ja|ZTP_v<3Q*l4AEEQ!E)d^Tw>ml-nbxQ*M;lG z^-~@j>>Lc{NxP0fcyN1I%pTN@?0KcTRyFlX}V z;?TsNj-8SSUXTt~*G1hd*h&kRblT-~7vU^9H$M!>cgw^sfuY5r@`<)ojf2`o)vXE2 z0nJ8<1C@eL?(YlJW0n@X;s&7+MUZ zR7aPLYQ?dty$vq_47rCP-UnXwvb1!*sg9-Y^URtz%}~CnZ4npSV{Rq&%(R9{K=g4@ zJn=Qn`s!a$a3vhEXl4qVq!(A%rF}5O>!PBW{|ZOhu8G+;pu;LnT()7D3+`3+0y3Xz zZ2PI+w=Y_aERRDjGv}8EF-1jB?5LLR-(e(8ywl7;T^yw4etN#J;N=p_STVZsJJeY_ ze;TX1;asHII@D1QrzG65u1W_IB6*V~nG|cr05Edm_JRnVSYPqUd9>mbJ~7l@L_XV@ zBeF?IsL(HR7mwDSqMM5?hh(XA7YQu-kw+2vp8$aU57#g$eHViA-ITo=b(H#U#_Qt< zZQB2>_+6NWK-xa`$(ETYf`H>g1&FjvqJ1nF{%{wYkpdH0upNS@b_eh?L3J~1=qyO3 z5P?|zd3Fq&bi!~eVKO(#A(?Dz7kgs%(o!a1sDDYkX&yl2Lh-9t(w?8LUA>CNDO?Ru zGz;k`c7p2fH0dn7@Yp}glqO6*9odoiqj(7|r%Ofv5=qrbbBLi4mY)946%`fKMC83# zUzg95%v$vftzFt_P0$*hlFg`0bw4ZoWyAhk;h9}fP=N!9uqFyabnMWXC$+wSxh(eYzNp7ltDM~kf-n9I z=|C9(^hq*LT5S_3nG37ium{3F-DWBZ5ax7?uk2E$Wd2a(SF{9?y+|-sz;;|BjH3Az zC`X?T!HJZG@ulHykTXgcE;<6N*E{;}%x9*7Z~w}MIXz?Ox~kZ$=7Og0e;GKu_$5y`;r zJ5-5VLGKh(;@$fNu9(w6!bnn27chuXSZ!_v1f(}TgA^(Uy#oEqg_+J0&3A0 zYBn|KD}@F89D1oRALUD8{^)J%d*@-+}<)@Rt= z~_s26Bu<6jxAiPV{zL^0fPKsFE3W%%He)^BqRt7n#0TvXo^+MGP(aF^&Un#<=)I<#5HlEIX$>5hgVBz&40|&WY7s|WU z_|gxr9xnF!Fo323x8=PhAcT{BfsPPg^MTI*G~?svH<&};5bz5V&X##_w)pawe-6@8 z&v+IqMEEs)l~ezr>ISxks*!Xo_d?Ut70SdlM%cCa{y*@5$9F{6L)eG6kOe&jZs715 ze_qCn9Tor+EAQqt?YPZrxBrh7+EdQY%5&V5#8nC4MItPm39$=|ZUDA-6_|2(eX`)5 zWnzZq4-W6nJ4`Y-@JjYa>G4trJQ_}X@Zbb08k7$;ITsv}8jV?yBMo}L@VM)bhD9YM zwPqWvN{?<6DDjj&na@iiAeU^tq8d6fq73YR4B5w6z6dWR>RWfF1E(1LSo^%)TbW0S z<||Rtd3~1iXMh~T?JB-;W0AdqE3ec2@Q%42o~0R(+kImq_#Y0-|DV6tdoD@EKoQ)O zd4kvHHj#tz6LkAY6sJ86Iduz6!+GIV5(}w+)gf8BN3?PZCo5>##Ve`u#2RRuj)Kki z$r-ps&LL{dXv6YVJZ_oy+{Yj|VbnK=jnf3kC0rezgEM%kJ>E{+j9eiG6+bibDmL|W z*DuGu_xsP3%g1A%b2HDvj74Ofw5UoR;Po(<&(Cx96I3$mvGg2$!YhODd(4!M5h*8Z z1+vu&yWrP|=)XN??j$KD=0v{>mUBh}Ul*|@L|Bnj_O&1) zTml#RemFl1%8>O@dTaBMw3VxY@J$`;?r+b`f=Xlb-6cUP2`D4{SY7hORdepHyMdR+ zQBpgH_OsF6X2?IA$|t)jncoHl{y&(0C^-aS3-v@QJ5HX%TP-R?_d%Pxl$kvplN6%A z9#d`&np#>+_p|8M{~lAm2Ct(hd=BKgTX6J}3+yO#1oSw`gIrI?sWNlkO4zKGU|sf` zr204ialRB8LzErE2n^miw;9j%;RSK=eFOtXuKWx8yO}6giP8EM6V(Q|q^t0;6!IBZ zJ&e2%4=d136C8uc|6?Cg*GlMdZ`~%(QnLm6&LBn!m^bBF@EkxH{Li>L=TPSi!tTQ{ z@sUvtY6u19KoGf1XANPcYq9sAn!=hPpVsX?RB#J)+d0306GZZjy9;Ef3e$(rO#G(16wZ6Eh| z4(8y_=O2!S$r9%$3EBGsvk0lnaqGaPj1)?GMmXl9 zbbx6bL2~05dD?Nt^F=Xa4iM7?;SmmDSwvt7=);u)2+J4wX|)c;^0kOEj1aQMavS?I z0PCzD0+BPJEaKmRwcIanwrQ!xo!_%aaxat}EhWQs1>?@xN&vzf+HN60vcqsI|112k z4rk6>$=pN3Yz);1LP3#GG7QeF;^r&AJLSK_MIF7o3{_y|kOOf>VJcbl@|$X|pQ`<@ z7*i<4t*3>?%`6lT*Z|h0hUpd0QnPL_#}6Sf3wEONEOLDiBn>ie2=_egC|{C{DXK#FhXJfFt}U$tLL%iy#XUy?>iDx@eAr(q!VVUZ60U%1Z&{&-jN z-``Nj0HLvr2%-V9@w<0#eSuxuMSfZvsjI@gXed-|`(a0@{sO#7SVZLWWQjc+A%azI zpI-j`clnf4M8T=;0Li`_NDXTjuLPCzvlbD%NKLM0-P%~8FNxkSAz;%|A4ki91jD(E zD|S{ZE$l*r#F+Xt4aw8wPrUWM(TK9i*@F|0SsC&={Nr|k zd*a*cmQ%y>BeN$8B=7SwGwQB^g}k>muO)$VNPsbzg@DOZFPMnA*lW9J>awvz0v!vlEtdZS=xARdnhE^kR+k{8 zFj(&2zaKecIT}risb}LjTyf2HNdMW^*HMG9$J+28h05BS9Y1~?2L@c3j!*X0t~G1j zX_R=5Okei=9hn^86;mf1h}N}+?!m1|uUq74=`F`to#&Z)E%R1v*(0=qk1sk-arn6e zt)zcVO+?E{*f+7WtHGqSSgmQhf`SJ+CHWET=v?qTrnqbzXwVHrq+c(61dQA?sH32N zGDj{4avK4IE0Gr2qOUQ3*n<@yYA|BFN+CfydcFdp-x`FXleaYX{KOkt1bSWCY)ON+ zW2pg_`6m{~;CUv(Mr)?g(B95zU+Cqq4QZVPrZXd2WF#ef&KGcVflT0ZlrYIGw+~v? zcm8x_`sHQru$IXUOKJw`KM%`N&^#QYHwwyY?Bu=}{2WtDPqfHxKP{6q`{;sba-Qio za_WGa#ceTVL+aT6ojcD6iWI<75Va2O`;*xJX3|fmKtM_Y1p&yfQGCD8_Ak^l0L5Q{ z>HxP1s0Q4BMFE^1sBb!&3Y1`Z`EOzAFf04Z(qw7uUskNR4dDv2Ol`(<5;(Us&}WlS zH+E+53Le>E8O|iXNYgqW#Dr(gQchC$jqa!TG|qxV$mh=|j#^lZDYi%AT5(%3Z{!q5yRMCqQJQ-RzVT&|F^(wN^mV2G&C78xev|HUjU(_XQ8aB z8YZ!_M5sJd&mXxGf#V#7=m4R~?50%Q1I*I#=QyTT^Ce`0P$n7R5jvpoz=Rb&e}0`L zjj1U){rt^cluJh!XM%dbEow2xM^HftY#twRf>brDn08z$-5E~M^<0E~9R+FAWxwqR zd)cyO5I*cL;lP-~ymr-D!xC!+&OzYV1G{4nHfGkGi!@n7D=GQ6kA#cd8xbvifL<3E zXOJA)QPQKh0Q&p%6TzF0T(s!`tnS&y#;VoJz$v(KVoNC4*!0N2eoy!=5Gkb`Lvzno zqR)eK;~_q*RSXzQ1&imlz3)eZ!@`t6>MUBPV)c%5NmB?k%5`c&=x;m4+37Y)oYV{7 z-ru+f4|)-BD+#O{{@_ICf&yg=QFRiAX)}DPSa5v%Hj8U4h-UXoZVo>`&6t?mT!Oe3 z_6?h(@6REUY}8g=$V&mjM5qIM`VE*AGLVfEI9jMgEwbXnG>O(_-PD@)9oY-sByN+W zQrg>4Dw%<_991N23F67OJG|ff3N17PE-s&w4YTw{U~yHOSTI9CGpFwa&`}9_c{fC# zzo|Z}-kB}@&Rf{#WE*=tiPnomNF6@pfqJQT6ljseITsufGs% zi)Nft!*NV$!k)I8>ND*WNeQKDk63jA>0-380mCdA(y3+K0&&51l#4mw1iNe_qNnT=xVaGx>N65hzc*eM-HY6?xTVJBI#azd8d$V_zZp zcuWORx!(-CpqxnhP#TiRtV9ndW|Z=50Z&&m#Zv2PDxh^arVB{~^lkZ+iJ&e0r_die(GwA&w0!AO|LY{!u}q+( zkTH!lRBlt!Lbu&_=_DPK$WD%?c9G1zqR+E2U_%MsFr?!YGAc_*B7m;mYCmJsKZq58 z4SX{z6U~Jd1ZZZzp-0+rss#3tu>scAsDWF6ipMyTUY~NNM`h|+BBaI+44m)==mnsG zs{t>-qUG4(O(6b>LFl7|nNBp)#WARpDd9MW(25J?%7fEm>7SJaqD`5coA3sPzB}deVhv z2a`!)UcHZJ;R3ql51MrpPDVwLwua{J&b0_K9XKb*J%saQGHXkU4HwUUw@+*kpO?Nv zY*IRpsHtU}Jyriq6VcO6Mw0FEig2MP;G{8??jl0ZNkn9z=uGL{(JBUssn|Rz+a#6_ zM2|-=YsPZLjlh$!5O8B92>^~5PMnW*AmvK3uyJMwW7=Rn4is`PJKI2cq75NxMiO37Gj}iU5QI|Qc!XMn zVCy?O3}~U#q1J!wAIVz#d)U+!6Rlq3mXrO_t>ys)rO5$)QJNSolhgq4Gx0YB`0$;K z0bn2j4?T!y2wJv@y7}i-a=l*TtzJ{!6EkTuU@r*8Bl#m>O$;!!hfEyAAruhc{HPdk z1&B67#zn%lPc^%Q5HnRnlt7>k+{BF_AoQR5ZU1~(2E++4JVKlVwqOE=FTyNhHyWao zBf;%32~DKGpT$7PJ(PE7Hrl_> zF``gZ5RQ0nMq1-Akm>-abXCiEr5I7B85}sUaK(!F{@})eU_tKNS!CFe%P=pD{?`-K z`y`zO^$;*Y18(%5&WB;Dv!Tu} z6j|%v5*5E_uR<;STQ+ z!Mt2mlvV+8)wE|l?5xzrTVv%+UXSJN_DJtlh@k`y+BZ{JYx}43fd!lP5zK*j#^Br^*MFOQM|tGBUPeY{?b=A-RfxPxuicIGf1W?Bz<>NH5jdc}9}$S= z1%18cM)uRNi}%kQMtmhA0hYV-Mf%PzFyd+_P^xL@h$dcWoF8SV9o+80osRs zrb4z%?F5-<9jXx3(>#F5MAak5q3rQw`IKff58x=NYKFE(smv$R4M&$e3ljW_sWO3_ z7ko@??v;^jU;YDS_*=&G$rw~1n5cwF1U)?m(&vds{2lD9FGvBh5@cfHhYt(3`U1PX zz6LNO01C6)OJ{-Zg$BH2<7?gn>OQfwM67gg ztkPKgt~!FfiqeK}5_FkC94AP%fg9sbcn$x$0tp9-5H<)E999QsTM-Eb$cg*xN}rt} z@p@NL*^%_tY8@@>HLC>$?N25sU((dj$opI@N)A$#KN~k}fS3k`1$Zb6?Erjz^)ob0 zjC$3@>sZ2O5vbD$cn8JU(wYT|L*eA~#~GB>WXq6#_z$lM+4xazp#v=`aYbMNbQy6* zsUPgUMvW)UCNQEIBIK-U=3LqaI4)DzUV{R-o*un)xR+OcXx&d zI3yHm7}?T~beE4Xn`E50$B~R1;GEIm-`~@GiCz0C)h^i~)TEFqj{{hCr zruP06$sqywcZBPGSs*CFDc3tu++FVZbeSdVAm4U4QcT{>KDI-3 zX@L0Tg{0#dA>x4WOAZd7XKeskS#Z25e=G)!4^DFuGC(e!)Y%2*@8GTRu(3(_`kY=L z@iTRJAp}jDEb+VA*QZ1i_bB|rcq_%q27vzID8J|Y4&#ji5STC-91C&;?FB|<7;0`6 z7H&t8LeAugvWa0OB6R6@hkKV9%7V;w2#$@ZQCFoCp9Cl9UFmfY&(=E><5k5B5;uJM zP`3`@N%LzcB2+*9W5Jg_$rX~>g&2<`Lq=h#4X3?_8Gw*mh+f-)L4JZM z$lBk|DBGYF`jI)0OaX!;d;;T?xETRZl`t-b!ND|Yt|2~+@{e~#sKDz zW}hvx=g)q3ksqmGq4xR1&+Uz``1#dw?mz2iT{MGZ2X_j@%0|#;o}QlIkXF0CUzF!F zvPKlL;eRAyaYK!z0)|f0lCwJ!aL3;i5mprRQ#08UJXz)N@g#!zx3=qGGLBkWlHu!R z+rR9$C=^GEqzY#|lm$3}rAP~sG&eJ%kFuJ^_34RMOF*J+i)tP~$lVOGP)aV6HZQ7~r|ZxK~G z3pyw=Ufh#)zImY5*d7MM^s$lS@Y@9Nk&N*>%`zCJZEsAcpN8tHej@oi4xWkR(u~Lw z%*k>~JJ>7(e*v9^BXX2Ik~}dY8PbLA6|>pySd)=sGIk@75RR@9g)|!{#C`wFE(DP` z>$^O%=O;FEoZufP44yz77P}E*;-|>XYZ~iz$-WO~;(I0ou^Q3{g9o&b?q_Vl2xax; ze!1Q>6>webN}?r8$KYriv0FmDGG9>99lg_l=tYP=QE!pyM0UH8=-m#uo@Ba#uxF!C zMy@4xU#_2E%4bLv@-b!%+&Das7?%lqgLRlOfdgu&*yZs>B!P|qF6+V&By13z?T{a0 z%delP@rLsiy*R`0;S+m))ciH1x$k|uv%qfCh7Bqk+2e&$h)xRn0`&vfUci77X1SS| z-e_+;+%qxWgHu7jIIOHQJ{^V+NA6P_l5oy>AMjwpaRG(0a25V9{wy3ocm@9Kcuj19 q_;c6~=NSNQr3AFi(+^NJaht=WEm_FM9A_9<)bN!oSj?EeG$%%$A` literal 0 HcmV?d00001 diff --git a/media/images/cutlass-tile-structure.png b/media/images/cutlass-tile-structure.png new file mode 100644 index 0000000000000000000000000000000000000000..a8fc35203d0e73e6de8e447e417afce990309360 GIT binary patch literal 116377 zcmeFZcT|(>*DZ<-MVbv!5ZI`IfPnN4Dk>rZO7EaZ@4bUqKu|yhL^>!v6e0Ak2q?XU z4nZk_KnM^*3GJ-t?{~iM+;P7%#y$7mG0q<+V{bPG$osy}v!1o)nscrl`ao5Y_BhjV z3JMBZrMtJ(DJUq9QBWKyJ9ZG>>G`Kl7XEXRw^%*jqy@G z7d&^L_Q1{KCv1G!iLuAIF9+WJ=W^QR7avT0irB>_Cwv8Tmx}F_i+%8-hh z_~zGleU>DsJIL&9PRX^% zY)(G!>_rL&?)hPM<>Us9JdIRUBarZk74Ov)6rM-Hg|Cm!PEAdv%XFq)+Sr_mlwa#s z=%hBIyCCs4F>$d5K7Oh3&Hwv5^*W9Zx*fBkpN$SG zRQ#i=#rul(0ELqr`Ek3?di8ybZ8)KS-<5}g{M4OC$$yR2R>q*;hy5Bdh*=@IcANJE0)< z#IKT!+P`m5P?SiJSG+_I=eBv@^ca_?!_0eA*cHZp=b$u#BJ9Ca>NJ+{94eY$!mk$L zld-CO7ffC>oJcxAL7_k1WEZieCA34&0-rSUOxy0=JPrKJ*%SZT&W0l`8BaFZ>MkFq zNDY_J`s_EB@q{qj;i}%TKAYeuM`#!Be0`dq=@@&@oK}ZiQy3u5uu?}$B)}wVmxOo(Q5bT(IXEJ4|{w2>({Rv8XDT!%X*U!)I$&>Wn&a)HB=XK%)Rv)lAX=zejM~@!0tz=du?(OaI5F{lf?}fa4nG>Yp@2SR|^#;+7 z@E`Q9Q;y}AnyO3-*BI9B%Y3uYjacR#Y^}Db{UB}NC2Fqct>dqwZ|N1|jXU5QGpHdx zyZo!sX38xO=bEJ4S|e0olbn!Xps$~MFm*+Fu)s)b*6964Zf@@H-;Y^5lSbWxb$42y*j#*e&o|LQLt(FFQ|W8i@qR_k7U$BCRzF?c zp+odCslv&}YX;doXKHI|WMpNHh(>(+UCFWn^*b&bZ_bO{{V493?7z{_-`}syUwMl- zzF^?d<89pHIolp*J6OQS&E4a@S5UpwdtQy<>+m<4Ygv?-(Z4;3Y(2k zew~Suqveh|2j<2q3FDsV{12V97%9}oXT?Z|p)ZeCemA+hyGu2i_!CjB)hrTDVyk!7 zsZq>JPed^{Qs%mM?mHVsCd_t24z1%Pq>qvt^9_F!c2uz1*fLX>Hy|(xyLK8egNq zU~*)F_W5*oceniRyqcQ)Zi&xBCLWp9AD6_(oSyqcb=u%Fo*Nq)=Fz=a94#+Y2%%#O z*i56O;aN5gEI=OpiIu>v6&z@ML3KL%oZ#aZ>!Hl8ek~C!`yCPqg`i%)ndC~%5T8?hv`x3|X{`^^93O6jju#cY+M z1Eks6ztVk>WcKDJ+V_*UY1YSkA-_C;kbu4*?U8rq0r?(AH;%G;O%7Y8Xg%&tSHe!e zVUw@R%WH>CKXT+qPQ3iiawF2ROZ&hBl|cC2-@kv4EjLB3CMPEs5JpY*Njl*s4psA< z;Yl%`D9=Ixg1WKs-u@nXUn5ROpJhZKVCA*7wKYFgFi!%61SVF%MAh^dU!k?>+}ERQ zJI&&C?*PH&5p3{qga+%@OY`!s(nbaBOtGPT@wJ`D8NK$lFrs&j%N>$>5!(6c8$HHL zL^mPS^@qDnwZ0qcxY3PDOtMDH!$VY-rRCexQEL4!t_WJRfAn3dR5Y>-KU)*fkgDox zWi`jPw;Ym{>~l}{2dk{lmHa4UXXhg1%Z=XaMcLVNFBy$D2kZArExVF8H#aL?G3!g? zqqH}i+>=*VS2bs~(7Zf6{FT%1Q+B4gE-6Oz%k}8?4FB-Gt*E#^uNGK*^k*)S>56KS z6v}vpQl8WkY3#Rh8|9gVK?m;dMn|8qd|3|5VIH_x)DjKv%5P2{qCJ!3KB0Q(_6(fN zdFiE!scQ7@5@ar%mP6{BePUrC%G+^u`X|r+Y0mpj3Z_>iBqXGzw=wejm0>eD%9HM`^dYNn#MYZ}Z!*4Sc{xHTgX_JHD7CPda9? zcG@6z;-hF4_eoSO6*aX++mcrOJ&#<96G3U(Y3!evGk*;;wNOe0RXs}V*6hP~kGX!o z9cSNOolNZ28kR3;M{qsDAHZ8!BCU6ny~}mHWu@^gG2S{X8=2-4#!IJKUCiRj7pjvy zLNmR~H-eR~AX`wL(l|fZoM)0Q$=HL&)aPSju6WB8!`-G1b-_P-aVP9s{`>H^0ysIH7}iglh)Vb^Xl-wAHnb*<)5FORL}R|)^p`NoP=Bd^9F@g-D_F1 zIh_`X!mjVb@!5a7t?SBD>VI#a;(ut$oNh$N#ygE0X~~O(2Qx%ks90EJHZPw%d2)pnFjGuS z%=z=@M-H{_!GpQDxFmY|#6l*RyY4t#Jkt<#2qWfv$G5noL{r)oo*+TYzPHn`(j|jM zw;$OWv-f%+>HX(@qZvK(Av*tn)t_9i83l|>ZDQyO<_e8Utlpi?&2}8CcsR3@xIa={ zT+AZvG4xCG;r!pGd-5RrwHIfR_M>97=C~VnPEJl%R#v*Yx|*8N$uV@tkLPA*cmMF7 z@68CIV{w`2+k|Op@Nfqf_3! zEArh~g6Lt9bV<+6Z5^%>jgE=&U+Ym+{&4l-fq@^^`oSCJ!!u)Ow8`zqOJkL6 zfjbaKnd@%|v&ghJ0=i#PVYT*Odt@djF_43;yB9X>h}9b6M)jRlCaP@+jLl6NX0gQq z1IJo&d4PW)t7AwDg=$JlJr?>2$;l2O4->`9ElW#FnHnM3D$~-~|M|ysZLX)4I8>2KM z&?~h;RpaZbEjr?#Jv$f^o}0^q;|~nnfBW`?ynHfptU{=?c6MrCyn5BH{i7h9Qe_ntmAiLeMMkRKzrTpC-+y)T(psK#fVK5}nnFl# zUtgEKu~LG|kb>kYk%&Vfbm{|kqs?(asi~xcHBe8VPkB)a3m={AyOeIcZgHmKYt zIDdM@Pe#k^VNozAN^{o7V#!YqC#ct%_ChoYjQwB)wS34zF8B*ay zn!-_9PXHhyB;vD%x5W>PjO-&PL_f-_Jo!F2D9*#vn*5-zyE|2Ugz-YD;_chF=jZ1E z!o$I~)z;QFGBSc-N)oVqZP7PM-frlGHk!YGPtV9uNs+5HsShAsw|s4;nf>kCx1HVH zq``c45fL|Re~v;hO;3@>*`#&=C9n~t2H1rKS69~rZ`2c2)lL#|VPk0=QQQMJxH8qk zTaPKzA!psxtz$I5{1_M*SmgY4sc4^zmf^=Lk+nOwm}YG9%X&V*at{v==lcr$B4F@% z^f?+HettQXW7ivfw^(sT z2!OMMxVW5eCm0w+O&gv|RVb^dGf@`=}WKF7~DfgrRaN}9yo#I^1$@& z{d*z1p)U^ORVNw9b=v9V38090pfv8p<$w9|rJ_PY+FzYT%Iz3rE+?I}g5qK!%iG&$ zSXfw2ol-Y3DO^m?${IaT9F?1s^Ru(FAU~hDu|ZEu8y*^JO&py`L}q7a*ZFU&E}z2- zx=jCo^N6ISF}JW-8ZB4zy;Gf`mLhlPC|!Su#6r!>|Aavw@o{>UIAJi#_sAor&0n)^O-6MQ0z261k>O`(a81Z>vtR+9Nu9D#()0&DJUoyj)*W5 za1>ZOikX?oF-6F1l1RW}`cWvTJWnQSyhj*D)XmKnhl+qr=(Hi)YMG0zn(_Rbj`FOM#uO(8ykLVO2fp&L|3;0Py+yHA~cSpCDu`t zJJlXI{i4TB5H_VAKi;xK_c9?UI_oDQA~HQaolS8if3>^Rc5rUaDx>uBrAuazLm*!c zRXBh4FtV_)*shv*ptrr&r$&AJ_#&KEk9bM9?G5}sU>D_k_e#tmTk}^h77y$Wi<0k_ z|CZ@745FZ*K(OxfN`8_A!@ilP?Z9SbRaMpQ-V@)?P^RVlN%xbaN*&R4kWE@Ojg5@~ zjb)_-RQV9GTLg@NQI#8hqE=rNvNP3b&cxZ-v89KBi+Ob%hmn@h z<61R^F^sr>;_%s8X<>sDc&mENPC(0+p>+_d2kDPl&aWaY97*?QoiCDeEYtMpN>cOi^{hungBMFY4+lrTeDmf_oUoRG{^z?Le+6BnsVvDZi+JIdW)Dlb*f2NQIWL5(M#KF%P zxE@ps6JH@Qu`+lvNLrzxp^!wn>E670b&QraKPRV03?T7PrE3=9q0(vb8r2^<30IDG~qTzc}laX^1Lz0xc%K$m2HZ*~l!q?E!qS(bW^tyIh zQ^cYyi#oIrBXm)EJis1ZxBEF#u2{g0Nz|s_eb6Wt@*I@O1jo9q2K5PEuiG-sEZ%hzCKnBWf^9K(eq{#UfRAF;?U>>Nz!SftkGvr<@P zyuJVinFK&hN%IPJ0A5s74=+kedM{6Y)iHE#g7vC)oBw1Fxz7WRz4KHvY=KYqMI&mrZpC}Gu|`tFh*l--iXkMh_b-vevnPb zkzoP(o1Fa09ZgiW_ivXG9v$^8S%BhulzW-3F8k5QpVC z(zIII+5n_oQMp>svS@opgom?;+3BL|{N3}C_~Z~@zFY&Qk!m}3(bdI;F6lZoJ*$ka zkx_n!rpB7doVm94hh!uY*`?b?evH4B+PcC4efRdIAz?bb1mo-Jo}~*7Z&!-}508zF z0hpYdn}eN6w9!`tRx$O)bJ*9jXU?Qw)zQ+z4I^7-g4h|DeShmd$85|mzU0c_t9D&#+vy6~n?p1!<~i7O@G z`-I4~bkF%(kE-#9Vz0x(qFHFicXxNq9zRxPn?6oUO9siNrm1Cv)$mJWGWgGe5|8Wi z^7Bi%P9%kuy@`y}vF@=@*VZ1;4by)YF&W0dJKL3F0^B%crjH*bfan8+ld{(PH70e;|X%rFTY_3@AZ)27Qo)?t5LmgjYo%= zN9%{o1spv^B!C3|5;EuUv9Ys12a4JqMa5B#Fn|LkrC`xq6ap`p=-&?yA?L&qfYBU#py>EXkNBNf15!Eq$B z?S#-OfR8yET+$8cJGo)j%6yAJCVxngJ4{7hNz0dz%5|WZ1P$O6Q*CT)ECz%5gED~k zkM4HT!As5Cn~xkZFLU+-d7E_wVC{jdTay zHj=ULYhQB$qMB+BhdYyVn}f{q0IJBHhzpW{)S7c#vwGlu1#qTCmA3{N?lL(Jwf{stgx%3JbehTLCr>dR$@aV`P@!LKT>m7AFfchVI>Yi-KL%)Bt`%{F2!MFN<`!^6Wc z;+s|P7{TI$6v*%O)v_zW;ItUgaCrcV;=rARg@r-k-iz%|nJMMu3mSv!_r z9{K0u#hcr(#`miExwtTp#{q|TV!Luzc1Q{Sm3m%YUQi^2F|#FCS@UW8jxa_i=;oQT(l|2hhsf=-;fNexw$1&Yi4`)??Bpg$t_HWkihKqXM_C`Ph& zbaqYv#(;*T+-?}iXVwcBF0iqop#|4IKT%FDQW=?<9)L&d>WqiS;4!c$1Z;6mjitFc zfJlIFmp~q&7PlXHg7(jWy!^NF$e69Avqa++;6rRIBZ~%z5*QN6hsBU(7g!Bk2y`L7`Mq4KY)E(fgb@e(<;%BbETOoJ zCSXz5pXQ_O=_LXie z`|VYOvPIDEdPmXTm?W~a;+P_A*$MT05eH0!wBP2k_8Ts&M6TUy*|r?X#nn_?QltcP z_xFQy83Vj@F=&CkW0hU1F*j50e8YJ6UVF|$n*;yIzYWbmNFIvmXQ$;fMP9GT>D6u z{wEAjkFKxiN>o7a1{BOT@e|56;6@j}0P|6dN zjBNGvr0(`nu|Ft!_#$xGj0_AYICivF0-)yn1Y$H%;$frpwq;u)UrlT?0X7G2tQ_Ox zh8q!;k{W5dLnI@%ueHNdnZjE;7FfmC1m#|P+9{_9^#48(UcPxPqeSoka z8<_O<^#O4p-FbW7fkJ~kzW(&UAyP^>bPybuFH5BK+Cf)3;^a&lpJ&RXTkGg z*@3Z>z*|#7;(q${X@gm`Lz4mcm!M%|zBcz)mH6Mt^Rd?gudc2x?_W43S8T4~Xtc@9 zh$=kkD3$mF3uZ!rWv!dmdL%CamT8@J*}+51-+zB_h#SAoQ)r3Vtq{WJPEC}wkG2`z z)N{B>@bEA1+NmWB;dm6G!xJb4|HKN^meG*RT(W z+9s0RkyFOMp8>~H3ADFmM?5r`9>tR9g!%iqcKncOK+W^M zbLUQ>aV;w(?UR@8WKR!*^g@H`EDaDGID>xWS{ioQsfnXNK0d$Xn!68 z6ry!~flRXgyxd4%4~tV-St->ezrRC}nu_`GLCV0Z5I8S5x;YW@+lxr4LGD3;y3NNx zsfxTH*+mw$x&rq!>`A4A#((5-FokR^ z2{|4;pjCrv&S~fM--g}-@aaVO#$Qq6iTyP46`-MEKP7(;* zYJ;Y6tlL^UL}_>ocm(v`dO5TS1xD2Y+oR|ubbTNs$3glts0d1MzPtsUdtX;zo4DYK z($Z0h7g53l;+L)uMQ;q61RCvfxr0i-=mjo?`-u`gv`H4UAEG-VWdKW`VqhSK$nT^< zZ*9y*#V`Zy?h}xPN~WV6|l`Rq`A*fEj8u2`1n*<^k>FtiAK+#Fc5)Ku?B7( zJ4u}HQ&jlzi zh$YO9b0``{dnP1#Zl}IRjv$65?PcaZO*sg-g*FQnzOW@Mbxt{08u~VL`kJ{f`*3ws z4*VhElPa3Oj7ZsL;eFOT4lWKUYT%BEq+?-Dfv8TY`I| zf8%r|51z|nfMZxk%syNI_kuZ51nDqw$Y(xyv6?+HE3K1e_q?;K;IgKhr}_T3nONnY zV+HfF!F4+`0%*tvP|Q6VWrK|acim%3pk0R;n}CxF{ujqe+anYgfo}^D{uY$!}D48M$_dI()PcXx#>X zOeC1oI0OU=!H6Zl`IR1|AZ&6Q7kVAG@>O`a|4I{MR8-Ug7CYutpy3Z9>sBMZeBN36 z=^yVlzyJZXfg9^GX*>U}pWjX&aC>u~BY9*WeLJkR18UNU?8Z6LIJgv+s`0sX3o?bf z;9O$H`aXEj4U!8glbm0m;cg{Dq%H*e(ttUPRSS*wS>W5Q(qz zb7>)aVDLfgM6yV0vy8w8RHxM)rKHS_mtXmQ8d$h1K3;8xMGBck(%X ze;(qrFPuPsyf4}n*3^z?LTZ4i^UCfPWEFCVUo(9=h(2~(vn56awE^N0;giVo0 z=@{1s)b;}VbV1gqq;_n<+D`)1OGohKfNr(`HX*4F>`m*`xx94`vSv>{0pC9gn$?j| zg}L0QE%d_CpB`CTk%2o?khfx@qWVBD3jg5VtHIR^ww)Y1-UNY~PIpxtOu zf(t9ZuWr!uLX0QvIdUc?6|hc2B3Om|;|2SKz3;m#+jOh(c?(;hp}%5=)?~~^9a`ptX*v_`$vT4tWV|NjsH3f&SrfQmfL13_ zn`}O7-2tSs2zHi<>gEtByme~!syysE?u2F1#&J5j&d$!gCa?#A4d@I|N*Ng)iIT$n z{GGHLI?xY~Kp*htyl}GDZ>rMyls5H5!LxCsnvW_ zy&pHuk}cVjrTm73Vb)3;$1BJS{uB<4-d!AqzNtpW-vq4I|GU@aKU^#S-&^ki7yj4I zWccR)Mon&48D~ zP17GZV3I+-TKdm#kv$zC4oM6A`KhO%dAd{sCj_pAQEL$SK_ZvN5f2_Z2d&O#S~+tXyRRt#8XOrZ>=_( zB`zjka_9?k&_|bkr}b@+%NPCNEsggW;pmUE)}O__bV&V-)dxX`q@<+y7r0VsEl**qk>8F)4>uEPd?Tv;U*V=4Fh*25ES{B4;z29c)bgkm5K3B= zyf18=V$?Lt0ug=fgMmXO;6$h~MtXYI0&Ci!-+`dgi4WZ0hI$S3;uy#^CR?pKV@^s* zK4CrqQvlih2pwSmxv~px3P62D6%|9!G33l#;pQeAQ^?5}vMmrayJUbm`Sp3@w{IZO zC56{Sj~m2;@#{IgTy`wL+^Usk=E9oQdrAA_%eE>uw@kc#y#zBXA9z??F{)_NKzU)s z?^UwKT?!amfRE1?eMwkYn3Hoh&N6u()Ve`NuZQJ1+1XZr-JsI2p~%|WB9v)XsD@QN zL?W^C&IB(M)GbWPeqYL7`WjenKr|9ijT34j+lieASNs9EgO4VIR6y7PcYE5O(r#)x z2~@7;ut~s{`HWHJ9_US<-luw?7usr!&@rB+;3c*nDFIKU8L)P(6@%r@HfU53*=R`u zQfnbm9z4KA>%QJzAV|qLvb83RgobX2T$kC%Y!X+BpAZjEZdKI?xf=3GL({N0jNAuL za<+x zx2by?z*yyfc}p#ILh3sh1%SLv@Hv%>2OaHdu$-x(tm)|J>cHSs zdUVWbEM@})6VwJsD~Q_~DuZ{GIM5u>Kb#Q<*5Yx~@eI(=ga84?c#9uCjGIEdL3AU? z;48otXs+=G=-r*2d&|4ov1^;-^f8R{Q$>?U~fb$^IC7=K0=&C~`O^j`I@o3xl z^_kiN)vZ$gVkfN$_inA}g$3TVyx4+Vd~%He(a4%{V1v^>abdOK4e$27ik%8X-6*xu zCqewWQnC=Fk)I1+5TQEyaqH9L##6ifuXUPnQ%hw+c*&)+8AdUl=7y5AR8-u2d@%7* zyV}Z2MU%!+iMi||)LK1m-y7E?Rap@%R1k(HxwBR1`h|yz-?7iKsk%c~z_WvDe!`@I ztvN{1;Oi#HxvX105Jpq=+(-0w^BJkaNzT94LtqaN2agpvV@mpsT|7;%jD0+;7M(uO z_o2d4=Sn??1M8LCq4Jj+Z2T8dOHa(yWkro<>L)~rMl&_NAIIh7juh~%_IsB1>J~K? zk(N$LL1Tot~#Pzwly`M!BIpV-s<^;gWXk>=}lm&6Q$P&>TaX4Od>e zxtI-I@hP51zwvlH5p_QfbOc^*s)9nb6(TKAa5RvH8!MaSMzpkEqEOPdw)8wKFf#%? z`1{j{KjJe`Vmdld5f~v0IxQYGtVu4;(N6!;O>$AHgvj;iiY%Ku78^ULt$Ay3aAS># z3hgAZ(|bmH$KXgkom-3=*5kGPZkmkhnSmKSJzdihZD|hHN1Z>Cn}mdRT>bm2m1km>YUqW ziZivxm94rgaHdH$`mQd0A{B^r@&gy;**I7F?v5acyoUE?Js3fobZrGI8QJv#_DSDe zdnOoa1pDV!%D$hb`uj5s4B|HZuOK5%i_v~3M@UbxMurcQPyct{&;GwPGBQ4nU z0Ga7ZMCpKdF}^VM*kcKlcJM*$0@uz1&YgYGY-rfz#{Q;yR!|_swexiK4Hr){`_2Hr z^D#PlF1W^46NJ%kck!_k|70%!-a*$Zcooby0 zXJd);)SZBsb{xNz>D{JBh$I+vxRazd zzN}HV9!o`-^}pu3E-dZIuddHEz*|u9=`Xu?f`a8U*>J!;fz@}h2=+I3 z+SlLUlTATE?@>0tWoRvp1yDg1QxVbyu-rb3JJE&UCY9Nm?gLT{z2k@4*c1TP5eW7b z5;D;$Rrle;vL`>GLE9;B3@Ha8jO@z=QO`-?1bA%gQ&UaBVNGTvv&`}@?d3cA^8yWn zIZ#M{7@^K}`7(TPln2)@FfjU01|20&9l}7)RDobGc|=2e;-mALl@_K`90nZZRg~tZ zf@=_VY_dLXklH~^y791Q@mJ&a&NjLbm|Mj721%&!t2!Jh=+XM?7G^rc(SATZgfoe@ z0=F~b>23z29ECvC?W=F4NI3r%uUiQRdA0f%HQeXZ#l=rBND)5M28aF3`eGajBK`8k zi{Ic^3mqTazD>$hag^O%Ly}3M!f&A7+8%8IfD;>5K2pJG>~#Ysv>a=R1K=d6W4AM61PxrNu^N{~l9JXB5KPFpf{cPd*+xD$ zcen-;BEXP^0&u+}jfYY>&G-MW70DSq(2ZMn9 zZM5f{sgW%9eM2|UBL+~)ft#IxbL9&9lfy@<0`hhNsr?bGh%b-PRGQy@%dxhxnfhSf^c9V-%alwlA`9wpVjrxBNp~x)x3@D4Qu`-FOzUghywVoN5jW?7r zAM`|Au6P>LK_W?2SBZb{-gvMM9Ho#WW}L(K`hOP)eueHzV!RPdiQQ*>W)zO<#St%f z&WvZJeXT;jHQF1$9%p|Q^G8Ye#Z6HCriJk~FB8N=I;Wqho%f#LXsH!U2=^|Rc3;04 zYju^t7t!!`G$SM9_N`kI|IxkJu&KKL9NIFuHw$I*J#!3Mc%pbZMQ&@T#hL$%98;7y z|MfeA<2kJ1fwU88p{aBKUlS>ls{i?W27`%IH|{h2m2WUl|8K|b?e+iDK#Nn^zb8gv zAULgT!H(;Ip!MIulSGDt3b^J_XSRzs{`6Xte*d{$v+w^;^DI_!@S}}67v*j}`}3m> zl9&F|@c+msLfp~s7cLflI*tuo{AKwj5_#?^Sk zg~^R$?UWVa7)|M+tVrSss;ax1ItRn4L^<;~SVS9A`dIa;7>|z92)cD?}fwA-aO4&{gpx?#FNLxY+Wk$aMeCyNAJ8cgxzZ zY{^B4&T(ZVD?E5ws$x{jeWjFX?a~p>^6#1hA1ALbq}9JD^y}6J+9Cc1W;u+>HG9bJ zr&t2A-9k2{>rxvjF@HGo`XPBtpwm$J?SPEo7|Q`xk=JDjl?fd!3GIpr?KONB^H~;C zkC27k(J0-7(pv23|85jV1YKJ%?wQ-7|QnytBZ zXa7KZmRhJ8VRo$9hK#d1*YhzwoxgnC z1I7-Gbsql8lM20V<}s@)7{4_t!?i|WPRZ$C=yTs}gdoEv{RhK5vIjWaxNOe#>>XW`A!xaJbIDuD9(2AGBAA zrE4{?_*$Bpn_F8sS7G9HJ8_-&WRkb$IyBg7^}|Z@^~a7KlL!&V3M9%0?v=w>9kh3# zw2%iDK<1_e6ss*BDR-Hdm-o^og)Ts9P^DX6ec)9jAue9J6!rf78d>+&83}010WW!> z9`X*}#W>}|?pBhtCo8DvlG8?HA|5aouOOI!UqD$>U5b26_diIxEbm7_M7|XLpRU7bm2BgMx8d_<{4Ay<4>8fw zP8YuZJ#>_P({pWHah;Bg!?U22l=)MZcd|YzZzp`W;tg{XQz^@Rug-U)ZXHh2I?Z;@su)TA7mni3 zoDGohaogWO?>pYS`K_{)5f?G?n~W508_kb8)*I2*>Px{b_>ln#h^hsu-#`pm?^d8E zk0lqrd-nWL#0Bo1^>P#45-W3W?;9y{yKLUmC*5noO_s#>9ouw zmSow|WbA!bKMDfvNTLNmiWe8&j| z);L2LFa^#=VEw_8c>R`&pWe6@ z#@=lU<~(%r^)noX;T!_+NPCxs{C&pF!lc6K7bry=zlX__vP4jqECIP=4@$AQo#c)L z;tA%2>ER&nf;k>W9s~pg)XY@fZ@9&4HyXms(Srx%nIC{Rx@Idd43%8x&42Z3f3vSl zv_vgDkTntpe=!{usr`+JT*1mF&hj4@P@gwzph)=Nb*{|G-!^$kOZUYG;g~O?7HHSO z_=BgPyMgeA1St7c#rGHQRS}AxxM8qu~Cf z?rXG8{EoUtYFR}(2WwMi_893&^)rOgI-#^~B(A7kMzkew1Q&WdhuLn>dHbPlu5Ek4 zRRZ=DVX?(xOO==Nf>YQ+&r|K2rpFi`*xTd4>j-li**z}R`|ryNCYWMSvCJ9{Osb?S z!nCN&w7sFv%_5qKUpk6H2D&xF@wEg*mpVU`C?m+2Ba3eqCAB*foNwej9vFyQJ+5Jr*6QuI>&_aJ zY%`%dqqQ43Iyt1kMeF}c^&irG1EbnWn`nTDq{GkW8Lw~jw@v{zYJ=%VV@Xtcf zCDIe`B5d-0`e@^{OJ!HmW^B3_B2W$-xe1>GEm$Q^DH> z$DsfwLBUzsQL}K5IipC|>&rZ3%Te5IrLV0f(iW0-5LE3tvL1+WE>7ePTmbsg0nQkOs+_iNR$$6hc@Z0TZ9e_{aF0~@kYHc$y#?jl?zEB9`aKifR(*vD zm3FCda${o>BV#hnW7m4C9=q9vDJ@$09n5q6KJk3u`zPGd{GLoUd+~0U{_i0Rvhb?HOcW>Tj z6kR#gPQ28fKb6P1C#H7V(db4dr?F)Wg@&fpk!SMirwb3{5&{oO*xI=6`m3nFIp5o{ zrv7@2aGAbz+@^aM)$e1YfBotF`zPu?9|O2XC$Oj1itijIJ(2CdfQiv(PpdSz|0rmM zO6=W3&it%6p&>)@Z)53{{v}#{g%|AZ5R2>3L|Q>2%A)Onuw~S6%GB2Bpt5JaG^JdO zj}?a*GmT&Q(m?Cx0G3QZ3E;o@uteDt<-XLY-ILOw?J!tNi%!J69fi*|k(fg&9 zk1H5D=KFf|-E;3*2n@Pv<$M1=By5>)S>;_jKmOBDe9Y!9Q+d%5CgjW)Z*b>U^$Ang zc48Cu);NVFhF?&MU@Q*vxks`yB<$??$ucrBlIP3e@{rGcatKPG(Su+?#;*<6cPq1^ z;QtAclr-LcMGZJv7#suikp650h#~+f$qLR=UA{Ch?g{>b$8Z>c1#AqVS0fLRO79_9 z?UC*a+V&q{sxQwvWX!Ek1^mp;+JOV$ZPmJQ@Zdq1;>fPQ%*WTE=lsYiG7VYL@>Z9) zdxpWdjrc>tX2QTsZe7!$wx0r@+?JnmUP|lb;A7E>I=O+8b%y!b=k6Ax#%eK+*P@Qb zG2e+>T1oC$EcbOV)6=up#)}E?i+FQr|B(2ndregco#G(X;^T1LgOVp`%nTOXIWkQc zZT7<>p3}HZ;my|eMFQ#gLi`yG+XALX;Tt3ly&D-CTAYYh-&0tIojoI@DU$2flf3OW zV-l_R*(_?ybN(?CyCCFPSW-vKL6pXCMy$;G4u1SPet#$~!D~tIyKHbpOSjUJ;1i_2 zJ(u^Xv>O9>nVlWc1s7orGgS3<*2f^De6N+95o&~o?-FY^rTxog{rJ%1rE=3(og>-*T&z2J5e%R( z4u8)}DeG2Tw}R-bCcW)*PRUXxc3Pb3P~N=4nxTQEKk5r_n~nV1fnV&iBaddWBD~|# z#<|}2a8K&ILgz<+Vh@GgNpls~D;JhfIFb2sW_lWCm#2UQN(2KC403^g_fy*JFzXyl zl|{n;*JLhk&QRWI^7{6jPNtsuK$wRai0j#V`&y~P-o_@k^BZsuTS}jAetJo=1(c94 zud5QfXNp67`*zckm{XaKpD6IkUFK#>$;W?+txM+FueXozy%8A1M`qbsp%~HMzDV^>}>zsglBV>S9Ea4N8e^UBCLQ zw2b?(G0Lqill~#^LoGAWm2DRpm5N9z+Oo{Us^=HCu6K!vW!Q$#qld?Y259Dfi>%x> zA4pI8R{VPG`=N>ETn7&YKdfTk@to=N%x+Z6!YQo90Z;RcORK+b9*qzBIr~A7lXFow z4>Qz~dxM^;c=llAy!}mu#HT%(hXxdql>GncrW~SW>^=yjs|Sn~L%9ZK2n-)wJ_B!c z8;xkQY9DnYT)k?u&>p!VOfNH|1o|)cquA;@oay~DeZogvhz_pl-MNuAtii-D0TGu+ zx(GqZ_KP9A^$5=)z6q=b=Pf;DTbq1pbvlfJt-vQ3_hJF%aSX~5n9R?^Dl$TzR60)e1)N%%}uXeTCVWp!zDamz2oOENK3on$W3$_ zNQ!5+SSk_5Hs>5q*?EY$93LQ%6=sOwLA!xprz9x%u6w?_@U!QYnPuOw0=fT$D-{Lg zws#Fm1z9osXYM}GkDPnJD;7O6eAxT3LX4=zRKP-A*`5gcdYiS~6E(t@+$?0JYe4tS z(tee4Yf^XfUNTR;Pptb<>~_Xa4r?0@@menbT8$EI25V7IIM2UP5B z6MHzGx~Ba%_TKs{>wItf#vTWe85spZz!*VrC=pOv5m6CP=|+$S5$RS@6cJGo>F!p# zF%YEtL{3U+I0^6}ZY!A`q5B}ZL~dBkLJ zgtFX*dDKEv_5~m0imNyL9)S*__Li8Px*hb+qparyoKnI`2=UAR;5{&ky99r+Loi+J6qh&F!oBOy;i^N{+_o+e`!VcQd8Vx^ED zL(*~fECyd161{7+uGO!3KuL~AHPKm;%d&X4AUrcuz)jEL4M)CtL8id|Bx4zh`$f|6 zB=)z$uPYz!P=9x1L140MfaFP{WzW5_pX!c%ikaNXpOifPeShio7+=WUI4$RTul!)- zc=EJa8zs%9k+qIrTG{sNeL)#qhXZTp6*@{j%wKwIG~dytVY%d;bB+04o4ktWZOdr` zvn%O)U2g{OH_~tUgG@y}m=grt5HZ9J0z^#AQ7B>z4Elz99)2y&acQHj<5}pIB=i3> zzrd$GaP`8I_3^*kNG72by#5S^uAZVkYw7mKQ-A1_hKUSw$|kZHrEIIBu>_RiO&Xa3;5Sl}s0%0F=5@gTvcI6R*@p*YF9&g1SUCF)ol&30?0MBb)8 zTkqWAm`Ju5Gr1ckyl(z!Oa~YmgpJ?VQ~+wW~8 z_3|iFxBMvj2&+9urLIOzb1>}>U3q<(FJa)_!R?GEjC9-HH+{w|e=fSlQ2zobg*Ac+ z$M8u3b9&w2$NSJ)T*;O?Ke4esmP0ypLd#-&bHD1D>>9hKV^ZlpU*uK$#1bDhSd)T3 z^kzhppR!S;MlWu)3^p^d^Hf$2XVX!LU=x2oZL8d?(|(o|B4SZ~CdmA*?C9HVmfn8K zBC;pt>;~*2XxPqtDbigqHOp`7(9FJ9Y!3u0un#%%@OcTFO(Bleb%Q%5>oSjH1IJ@@tkt{f}EcnM{qWa zi8S#68P`$O=lgJs8*BpcGUcg1x+9X#UjOt*+fSFth-ucERJr@#^FCSl7J zAW*+NIlhQZ?1sPwDi0i}Q0s90^m#@vkDLl36;o4ud@BH3#y=kPcvT(~bG{Qf!4QNr zNVRh@A%QG`z@I_&SW~3)=*~aKzMs*gw7K6v*2NMw5@NR{(P!= zdY4y3M8kHorcV#T8MQTy!|6oboTA#Hr+jonRkeziEBwC311eo}?tASSjv+f17_+I< zWuG^y32a+Sdn_k+-7Y+Efsu)k5%YNjdez?_{ar$^12jl(O=eqU0I+(P<{23t2D@cE z6Me+#F$k6Y{QPK=pETWmZDKRal}e*;Wy z=Z7+~X)7+3p_`F zDsBez*Ctn$f);$*RIZd(3TL}VR-Y7DbcH4d`UC#1(83)3J#mt13;6iTPNjZl|ZGhqiC& zHQ5|FL&G72MMSBDkdG~Kofjcpv+`YfCE}Q{nY6tv4ywDeBIt zRlO@}2suX;#idA>X~w+IWL@o{-^h(?0wi;LO$j0E6*ir-GvbPny zJfgzdyWCLOQKWedmw;whtM;{F1A$Rr)t4P4&6oQ9=S`Ts%UFq@8~Q%GEA z)XrKJL&Szf@fL6u=#^_~Y6=PpkU6697&gpay^oHL{b~>|5CxFTY-5H(6z+p13dvs0|sJk5uK8cG>A0Iw+pDL-dnt!GbLu4nk--=6qW1upf7vh+^j0hn& zI>M($OhVPH!^TXci7nYP@Gnc@&rT^tnVtTYDsiFDv{IKg^h{w9YNL zGznRxzm$VN9XE1$u3plS)@7dM6Sko*>3?=aC%T}(5mPZJ|D$dKTnuLReV_*=J>u(qxwd2A~ql7eQk*g>DWV!D@_HK^Z}?7E2A$P(yP#__S#Y{?^53 z3}AXlPWF<(_2ee~4zJPuW;iXAKLjLQuhIU>3uDUfYBda`P7h1PV0|9YV=!k8D3q%@_m%tFE2is& zoc61{U-FQ1jc>28s^ybiRGWznGmvwQic}sd{Xu;=Dm*ovzm!f+!$CoLfp34jip%;8 zTlP>?HD`|dv6vY0N~J1|vwFZn*Mrr&b>vc`1s0q4b94*+lx6u#(+Lk^X%6#l&*5BP zOn-lk6)Il13ot`bGBp}jJW^&R<@0T4-E}6v6s($ z<^bUX(`<1PN??Qdzj(n5CH)lK-Syzj`Ec2OSUudlJ4>oqfPr(+GWTov{IU3BZ3Ur*2hNhZ`1V$0r@)<@l< zJ1}M1zpd_#+psQmU3O&9agn-&ZAO8KHPdHq(a^V7m@o6_=2yOPvZu33Q{TD$!F|D` zgogRcrwkO`gfD0uVQHWGcw?@?Y_9+OTr*qt@lR$=w1;xbIyysczHv9})w;g?2_n%we*a{d>}DJk-p(F14aS|ICTC87#YV~RT9bIeW0$0 znQLQQepe{dKtO3>)(4vM?9a>1EDR79g|FjCKz-NcEcv8JMq%B)9-A&Xnb+IQ`_L67?KK()DDOU9u)8);I)HJbG54_^!DU~l%{PCio{;qIPR zX#INRz--F2ctTuDkWOKq5GI=KdV#M8VbBi&LKYyTDKiP(Ud$*MGh%)ZcR1# zw65xCw8L4$Mr`p`usWr#l{HktP0xMx`SPb#61`|9k5H|B$b_pwNcYjE#;T`ldCe@` zT947=#a_+rJd*E|v@3|gFkz42lFsd&OS~KR6QVyVy$Sj#>K|zN+$cEg6Pr-&AS++| zBR0{BtMdE?D;iesR-a#wCQU~;%4S;U%ec7#j;jsK2E?92X(lf7^pGtYe|WG1Fdhs7 zLb?wv=OJNR69}flxEeH}luncne|=3XJtd`OQ=dm>b$Hqwot##gpj4UQ?TwSW$7`>u zzvQSBaFUU`{k*zfaJe&KG|A3Ted+md!VFukkLlZKzCOM1?J=K3nXgejvqu?LF1}vQjBD7?$C z$G<<%yN7QN$6PyUmLiy5Kexm4@^d~R->iha+GoF_{3+%y7udk|&VOOF?XxZA_Kh3$ zUq1}5{5hOwZ6rzGnEQ8xgZ-1!@4nkVODauGN6fL1o}D38I0%LN z3Cx5SYb=Nfv_Dg%lV}OiY0LE4O*3CoAN24NB~i9C#(P(ZZ~GrYK`##5bW-uV71)LU zZ2GG;HFT`I+IBu?nA!j82zP=EE&qx3m91**_r2ZHtsK&x>zCRwt5=<%oH{+V@17v> zy`^gvW<^E4ce<|wi-A<4NDUKr6;zk0?p zsL&{=&^l+i#*j2St!sqQ@`QPyi#gN|~T zz?GcPpT#`<5+%KBYeqJk$BnKTQ)GPZJ{6NI;k(TDE02=wiX*3!wCZik8-{EB>-?;8 z5;jwBD6rm6pvjGp5UT9v?_B#PJeQknq%T1+@44DIab!33sc?gI>KgC^ad`~d6AiSE zOx@`c$yCMSZ-t*D00iq#F?C;c%K^4+NB4}P}Kwmw2fwy3Nn<@Da#A6#=Pu>-qX z*ikC;9q==)33Ax=xH!N!KpoQi`Qq9uStdnw^+E_S5$_^nzvN+d=gtqz#R@=> z&l}*R`W%6J>ErdtiUuYFakVE^tEV0gC+ zmABC`oKj&wb+lqmHlK4@|Dk12#JloMW;cc%~)t3TzPUN|Jsbzk8 zAeX3SUipWmFqsOY`GL?RH5bfnwY(p)Q_R)v>XUNs-%ejY$0q#c!p@MlRCM3W*Ez4t zm3G=U9pcJc@hNjpkbQu#{#=AZt}&|42zP6s{<;6`=~F@wi}60Dk}WD|C81W^k*03Y zXfaOCr4U?Zj7(9_d#+#-60`!7AK~QU~==;VZg>x642rzvqy75oN z_FWp`(&;S*nqCWC`HMbVw9`0sYkB`Pd7Q@Sah@*o!st7;vaE)cM_DB|PI?Vetb{*@ zJ~W~0o(gq*zbfFkRmv=B+qSf1Nfpnj!d)hgFBTU3%t~1fI*~G;Yb+FgxKJ51xHlqH zfg&bJ!8A+GWi#xx)CDcwgva}w)k-fLq?pz9PqJ0nFbA)^Jm#$4cRX)%>^!pE9s~+F zMR0nf76U2U)~#d`?Oh6OD{Eoz03v;~Jdo=VDtJEq3u7O8Vv`9eTQXk(Z6tWmn-|>| zqh^+LvDK%`3`}WtuY9c>EZvnZTdlO|xvl;!3)^B^7d=&arCC#Zh>&FyCH=U@cZ$Iy zw|wUL`C*Eyv)%mV%d>U+Teg|F;mTi>TF4~_NiAa>g{}PS_<66L>T`_-pVA`?Q_UFD zudP^k(&O{t(ji0Ij*kfn2zq}wVH+2vxN`udLBT6SJ(~?knlon#(7_~R=%S8yK#;FD zHQhE;yZqYV4Y&5n_s6VA;oTEzBACsQ4(?B`Ic6(x!6CZEULte#6xrGI(hB{U z%gs29Zr;`pb|k-pcF%LC&NCmV$gz3fmE^6e7!hoI;I`1C2+2FQo*eAy3bDRMvDg}= zedDo;ul>2;PS^JnViu3we66G+N1WEf0ky`)vk1= zJEXtfzi=;u^v}FXX<9`Y$6bR%GGp1{6WO(~m~3u9&`D@0RdSBwmcrf3!NZe3O*J_^ zU5-N&@r*lUy{r*rNZ!hA$65d?T4}8Gng?WI4l|}s%R_-g@`?$25Dsk+`v4CM@**JB zEg#R?44Zgw6o-U_009*r7k9?Wi|OZT@VLg@*iC+$aE|n>6j#@{Li++c*#{a4)LVL` zCyj&nH2LeF8AdQYZ4bVu*IFEVkktBc$3BLwOp~;K@Uv9}Nz(i#W;)3$c9I!Q3F;H2 zCYeqKk2>Er78O;r1x?Kid%a|f+3%iP1ewRXs#nKl@g!3&I6FJ5s|O=CMjr=ASWwxL zhBQ)@F(Xsg&|u%|bP5ZGC{uj-@?&nUm3@A^{p8uRo$4KLPMD*?0~ie+x3jqOeA31Q zQ3E)`W%(BP0uWY9YnKoE;ah2GX~C=`;vJ$LjjkdVi6$txr4)jkYTm)UPEWpAYzPV% zfkVA=DrfDj+_&xH)jn=g ze5^*T-7U#geW4k%nn^aDNtaeO9l7!r>cyaZm5_ziwiAqshBrjim*V$Q=jdOgBA>|} zKDVJl-J5exbv1)c)6cU|WtaPBZiTNMHgcWqQt2*Aa|X9eWEoP6d$gCtg1XZm&-Jf; zr7!sMf~JKg^@}-av2ps1t7q zh_WjxVXMCES+VDFm>?9NpP7L`#~fXHGP&oG1uiO_LeVT&zCKbvUU>Go&+nIklR>%S zSEJTZTKW+)%L<5u1V#eQXl!BAXcg0JEKSVoUfJ#8;d|uUekIFP^Sb7v+3dB4xcG+J zRdQ7Qo|>h06!27~^_YuyS`1t8j;*Fox^1g#nj$F#eD>00zEbo`ay5pnEb8OL7s1Cq z=|-g~@4Pd!LVn6cux-=_7&#Rs|ET+ID(DuQQ;1{xf=c+Gd%o>yi%PxT8*aGBEU+V9 z#Z!l}ys(mWh4NV6-Z(U%xNd^RsNtebPSZ*V``>NPCM?%2)UOr@@GzM0V^p+fNHNn_ zVRW?Qj0EGF@vU2jMA_Kc6M>Sf6$n!~KcM4MnAaSY58%y!*{bi`?Fao|n(OFTi1*$? zldp?EUC^lO+{yIeK}>ioO@{=Cqyp;?9p$*yD2=LmG7r4KQTD?9aTZ0w7utfHn|`Oq@^{!zd$E2RdmYiO1lsANt1^xZ2-e3cpvNi^yQ9(Cjv2F zjamGh3mJvOA?Wn%++liY%e21R2a`12IGv(TC0SoZXYMbriwrwV2USV%@^GD^&JX#t zOiJ5A5kIztBzJMD@Z+9IBeS1of0{(LRRk&rr|(MF{B3=CTdMbFbt);l1xY5;qNY$( zuKe!{3Fcv#P7!>+!gs1nd>dK3@maK&XQ9#5fw{~b!``&@$E-s8%8esJ40N=~Z(S2d zDx<_zCim{;Mf%5c^{w-)GrjH{L6T}a1%%fraaq|gJIhdcLoPcFP7aRhxi5{49odVL zpq03#`|bm5UE_8p&=mxpwQ#er{w?>;(6|6ecV~XmO4g~jaReWtFrqm}(ebw+?awc!|5hF+4 zxgps)E(5v7=WM+9Q2G;A%3ZkT#sfPTI$cBM-lA-a{&uMooI>%^331;ZvX!-w6O7qy zY8~f%Q|p5Mq8l*nPo8-%tyg8jnIa~!u4#C@Vd|oLgq3p*XZlmEgq@{LrcKJN3t1^& zPb-B6UE+7YS2+Eqz-`l+!YBA>kK!S&sLysMlNsJ8w@yB5i@(r8f zO+>_$#pLAaJXQ(!HK_M5`mv;sTu1bTemF{ZxY`uYQ(Izgw7nLN6gl;)U&4xo4c1@H zKT+b($sFtGE1EwxVZr7+I{D`jBfYH4S58~cu;qDr%ko|hzuLi_prF97f6wmwSKnjl z#d5l&t_sKDwwzwe5ZtUE#XdF+K5M1{;MS7v&p)F_nwqW|C!bvY`O~DT#+p7ny8b%- zHNO{5*E=oMXrd!?iI1}U5sPcuA2HL{v0%B|p7pj8yrp#VuD@?sSX?T(L zwBlp6@^kh>fAF8^{JF{!%KzK97)3YLb=fJso&&co>GDTKj``pE=!uq!&FExvbb_vN z>t#9D2kjMI3#I0&{w=l77)*MMSG=wdS6&}((Mj*E*ftfDr%B_iLCgO(zdeC=>^6!O z`Exd;$oq`8c3sZ)r@b$$v_F5IOP>F^y_KWdmZHCs7_{3n&F4d6d@zS+dY|vi!jL&j zjvSsvGoA&D>yHl@vm-zl*+%tu-?)>&dawcfB{3mE2hV73q|xDxb0+F~eGu9Tg;mQ5 z^_<3l>SR%#sL-I6(m{&&%f1MsGKci#nFS*_=Pr#tofhxy8iC}| zcN`?!h-n2OB+M)1zLjuO}Q8T?yTtY~-5K{D~?Z>3j-VOQ2)&;T$XX+RUTo-zt;~e?PC3x?j zq@CmHv-^)WUHrKUPYa6t7Vl<~RD1byshH*1v~j6<3za$JH+r@p5jG7w-34vtnqF@b zne0qaO&vRjoRw4m!dvCObT;uy#)~^64Cz>zhfVusG^Hwvd2%B#Ls$`#*mR_&O|4wz z0v&D6)?(@Bep!r$b^`;)(pe*{Ho7OBan!cS4?q1dY-3PQ&!$0}JE6E!z4=x+pQ~H? zwC^}&_sJ?DgEXiRSeI$3^=y;>#eYin0cZ~?#Dde!-YSI}EM^YbqE zBJE`M*(oQ_N_O28m{}YMLziNpN?_(nYsEIphkfrzTHZDVQ4+lG%|5h0@1iNvurEHh zwA8KtYA5sL7Fd;_`$kOThB+`u_@fm>f*Ang)| z4so%zb?r7hF!AgC=L>pC>aeeX=OgV|9#KL}*LAzN-l=&HP&d<%R2nygT?STz_?7b~ z{^_9t*2AIbFD;dCx{XL@OA8KW3|?H<~E1p8Vgpl>ghF@J_U2 z!(UfzqZQ>fs$&=-TpFmTB#Wi-H=ueKXCLCjK|!Dn@T)1}PX=Mn`FUG*yE@bx!~gpA zyOJ&!^+xvq88sFsif^BQ_5Fu-VA6;e_+ZWN#U>Xr829UksmN!yN*TX3X+Fw6^zzM{ z;s5!&yym2T{_{%TwDC8?5nqb@q7PHuvw!}+X$+YAGyncKRi@N`K0INy`Q5`+G>pDC z!&H9Kb#;aF`x}{Kx`tf>qPxh|CE*8F=^pQHNMNVP%c=Ip zle;>da+dCWUC^c$6RF&<)F&2u!Dl2$b0U1;^MnQGs(q#M@Z9cLzCJICrdc>Iu?)_e17X^rva>yH};xeCXtCH9Lmwh|MLmiBBsZV6f!Q{wqm6Sy4V z)KaBew<-`Uto!J$0kMR6gRvq?#{SI4+@C+y-hKP*W9%_oJy&ecb$tbD#j=U4TAatC zlu~s`$$MFa*!AGFbztCjqCK^P;0HnJ8{jIC(Qr}c%E&t)sL^5|P?pYTZAY7cqmVY# z8QH8Peb#iEWm&7^-*jFcQkb1H03+yj@njx?6R2MZd>o2BhBlv{I?atp*bIJojtMYA z6$m~p=MT&1vGORus+IeYf2OQt*sP*`2EE7!$T2CROWw)D)0!kIRs2%FbxO?z|ZRipj5|H-+v9w2-Ok_6R2}U$dv>)%+(}}7t1f8oJ5Q;5)Xrp zJu8eK8#{^=KgL694C0gW{C(uJw8Ob=Qk*(9t zX3gu9gq03tqHvc=QJ$mc*;p!bJNcS00&MdI{$mF0h;d$yPzou16(|eToo~|kdHnpu znsNkS*qN34G2~{pEuMs8-g^pCds*44JGap3WvMu7h6E#39c^2!~Ed08kOQ-K)*l2}F!O z_f^Y)`yhJtSywUF6f=FNH3}JbO7p!H%_sLT;R~_Bm*&VqFm^kkdSe&_bH6#KSuYkc&yDlW^w-FbhEAz_nr)kQ-zatv-arwCa(PMQ+DG@G)!85 zF#Nv$kAHj9v*hr zue(}U)CqK4Wjby0LUw00nB|lX9?inpfbMQ>G!^~CS4_}HRQguBQE+ma@dT!f&Mjz< z*fY=I66HF6{PXeq(b(R^HH>@rig}b{tDKPb-Rk4FO$M+K5k1dUv)lYm&9rY{LXXpk zc6GIO&kF|;-kggxYcWtV&H-Nd^T((14#{S*`_Xu{QK*iKby8%oJ@wvGVSCO^EzchN zNsE<=Cb9AcxGinLXwyaE5x$q)TwBX3->~Z(t`|v7xdM{`g0)%rH}4>YNliJIU2Odm zG}?)IdABkDP*bb{T#47~bKx!Rm7R)0gkJ$P*w|53G*vx&_ayV=0}kk`+m=P}C%1cX zvjh}E&EMekz4@uj#Q9xbZB_&y1@)QIGV1*-EP^(}#Qv+MW4ID@YUpl99m8z1zj0-g z$KrIp(Vnq~r|c)xmg*QZAcX+C=4K;Q~o8N*Re>P+K0y@g%_ zyR3Y3y|T-ShllfNzVRNKgQqH(?;pXxIXlEd=J(nF{0+kKPD*C@Us!_YvWTrE+)uz9 z@HLJ^yeBs4EY^YC7orQU+Gr6cy@&=b~W;7vVBx)AuY|IzFECELw)u zP3!mv%C4s|1Mrw!!@J72t|V4tf675AFdM%>cpZC+7tj|T*YDId#Ufj!+?|{ZAtwLm z%jEEt--`%rRO=alTO;8XN)}uP9{_r77wvt&9oTP`YmIX>qh-8F$F5x&#T6J1Lb~^< zj=_u>wpEO1?azHQz;QdSFU-T8LCwg}5J0Rv9Kti{$IZQM)TL^^ybFvK>ENJ zFKj>2bm$@pL;GO8T8YLhVx)2NN335&_B7w=e*vl8G)oLE)k@4&fIw}aG6TZkC~h|R zTY&m}2ll9S3;_CLgV9bG4D5}^TEe|07~r(uQE1OADJiK4=kZQ@-L<4B$%Rr+;d-=X ztn0hunp)VNY-eEzp!{vt~3j7T~z3osRm|J@{{eQ;3sX z)#bq)xE(ltS?t2KZw%YH668@h6;KKb+rvfret5F3y;X?pohqzqda#4qsA0{@dL}F< zM+el?Y0R}2+>A;Z$hcicOg!Pd*5a`$_`Wk>nE-wK%?lN77ucXaJY`99z3{Za1$r)g zM(eI$bXZtFLgUH(CrPS+BaFVBh1+M7@)83%YiQCqay^keZ=O+j(aE7&>_0mJ1OQo$ zyGU`nvIo0u{Gy3405i%;pGR1v(j}#vE<1@Xel=LkpQ7*EK|_EF`v^)T&JMT-bwu7! z&NQlXoa^dyAB5hvqJlfWH$}0ACLo~K!|4$_@8Dds%ydJ@LI<?H{z_^ULG(v2!x_0Kq{lY;vGX&9cYl zbTzzdAP!t89f18Sigy=;Y(u~sI>}jc;Gef;GL3cJDMw;%01seWoo|LdEK+uqbZBpQ z#XvmYz(UuegIF7wMI6-KElE2#DTgh{G^+lp=Ap(QYeXkOqu545?5>DZF=2TOFw?3mGK>4Y7oip9~hX_3(kHSc!LGbJ;c?v zD(;-$>zuk%9wi#P%fN$dpdI82I+m?_F~0I#Isnc`yac(=ocZ?U<;Q8a>7ma)ssVxY z^HV(v$70bF#@$s@+bU#%#m>H&vepY#OHHxt)(4H;)-zqtR#0!fA{2T@+Uk^bX zdFEg5Ed}xATd0hMw~y3^h~H~P##0scx=M;x?6trDK*UojU0@^?) z^d9N0_pd!XwDi?Ff6-Ic@p(x(^WHq#^>@tuuLf9>Y>LCJmtH%G_;F79I;mV@(R(|=|=x5rK*;UV7JpKGKwHZXk4)G-ZetV?N-=SF1m}~x!8;^0v z?MA0weFxJVp^4ny`i6v_0-<5;9EFmI@aD=>FaZuV4(KqeGiWX8suiP@hK0Kom!-ZJ zI z`IQCd2n+XNSp%0smlIX;4Zh92cN(qpZOn{ItfRw!INN71g%?zaO{{+pdCZ=F!1qAd z)@i4)^^n1JepC9pjowog`)>0%>|Q-1R9|5Kgz34v0SnXIl$TbQsLaow`h5?G4DrPg zDR|{>8F@rJjME+)uE;OZ5)c1W{BKG}P`vABfivy6%xr@WA&h=nkf@S>%IdDX>J51L z^7}vkBL~LV&k!`+Hads+F>p-KMU2X3cGa|D`N5tVxgk&mu z2k$UBYzf2cTgLxpvXWhrdZXRAGY1hchM%=DvbSHnzirpBKy)Bcz_uf>#zZgm-*;dl z3#i1shYufO`m9=R)DXkf*^Iybh^Si;URD2>{z!ZW|9@n8n#H=?J)2v(8kQbfDn`6X zDk|Vgl1yC_&%TSu?)l~aLM8b>Stj=1=Kb#l9sT#QHqhXwM_u*BD!hOFT}l4`i3bI` z1b({`|G}Oh6$9Kv+Vu(DzbuyF|HU%#r8D04|NbZ~ezc>r((S)Lnho9duYaE?tmK;3 zM{cjM{kM$tzc?%Z{uW`z+eQ)?VdTp=MsWA3sQwqQ#cY?n#E~as=YRdlx9U-Os)mEW`v3Yx*!%|0&RFmBmDlRuDLz#vsAV%M)+s8Un8C4V zFSr?~ZrFX?Dc^>(!CYvFS*~z7s5re&@q^`5CoT8MZ&f0Ea}mt_>n?)@7A#9G#htQ^=7cC{@FjbeCJO)FVkRXQ07fsC+8!r2xvpmz^nq$ptYV1#nVRR4Dv2a6C3Xfp8 z2e-*!F~_=0jSeQJU@gX!_&4q9D-jFpx^JMGX;|;l6$QCX{s= z!Q6X)ACj(DAt9q+d7!Kcb8{hkj*=i;?!r%Z?C2~k>`G1f9o;@mZ@`_P``S0mgIm`% zB0B3R)RrkBszb~4lMh>;+PZo^c(4fDL6>~HSAM>}gk$QsOR@PtLv+;m&vES$# zJ3F;IKOUmCCpfz_nhU=Azh9BeZb`zmSNG<5sCQaB^$=ifu&2>nG<(_27gfO090 zm0-Q5>hL!1mg2>W2{nZcKM-Ilot*X(l()U2%7j7tBLs62P5mcNOtKeBcWm-9>+YY% zD-cGe+ZKtu%$q%;X{OzpqGQ{NfD*y)RCa+x(m9K4Y+C_V76kl_L~w1=qkkh567278q?c2v5h? zVF33H26{U>V9xyTVba~g{xT-PA?=ASEBMat*txUl@97#NxXL_tfs;f{p?n&|P@H=j z%&b%~`mAN)_Bc&zm($nU(ulp*m`bTBNi19SLb)cTPSq)??&LIRBhL0DCy^m-L!EH; z<4VKOKVrRw+4aiHsH1Wz9Is4n&nh0&;TJU2GSpe1e%zToitKIVm&rmV%@O|`e@`fxg7CGfP=Ekf1JK_OATzKDDV zut#PG-&@^^ebT<*1uK739_rzBwv21hQBs0KCs!sPZ`>6DgiUB1va_+7{g_BwydmtJ zrja|21n0UUBYbL{W(ScwDx0endep^hz1ilu(@fC%BGAx=fvoN09;rBC%Uv{s`afI!^}(R$Z!3$(`>Xkr=;u z^CqQC0SP$l6YM=c7AHr>S!B$7sYPjzx)@NSf&TvglzYYn5)u>35v7?m11A(cS8}E7 z4T$(M7cVx13R#iA|; zN?`ElbodVyRsx_Kj5noH*-mvY6V(FQ5igtYS$6;+0T}*AJu5CFL*Do&MmmsmX#bd6 zxlMaZsbr)vGV@esj&KeVz@}!abk>%_Af+n10-}uzhV1Aag>c~jIBH`{o;Z4xU`$pL zfF>xE2&B|wR(D5m=%87QgT*cpM7KHEsuaA3C(9a2E6IIn^T_5N?>VT?W zOt_78WcE?>ltc|zfZPwu=!ny%ul5n;Wqix!KOXTUwN5Oo8!a4ZGu=Rz{sTnjvmzqJ z7=Ix!ejrnRL**6oz}K%gONipJGqbX0vfP3Mpv`n~<-R^<*&))|xhG{BWIfJh0-cO8 z(%7MKrL#;aTXCc}et(Bf;yHzoxS4#wc6lhz(or1{oC>~D2e;`)FN#qQg zjJ~kCZLos_+1r|umXrlphN{XuWBC5cU0YeX9p$K)oB!anDXDya4djr@r@E`;j?;JU zlALmpVN<|6G{VV(<=$qRCql;^-Ovj0c(MDsb36>I6Q*L}kdS;NSmOT!xurZX4vz#Z zS)dfXF&KoxMvs}Q?S_Mt-U$%6H`mKImxywj=kfNXzV#^wp&na*H|21L*2CvIU7HOs zwnf-GNnSVpNhWJ#((dWZX8+}mfP;h&jSJTGQ5j4rDkav?+&{u>?t9?0po+#eX&OdU zjEyqyZZllNm@|@LrwyC}V=xWR{@7*uV(lX}&l=#L!kB<;m}BHB9!wQ)E|hmty{@{l z*kz^q*`b|Yf@l?>hFDGs^XN1TOkVS2mFO)?X(-0zS#roGO6_fjfqdfS;+jO2geX5a zDJUpJ!Z?u_ZpR2E(Fp-X%6E2Zfqy&GHYrqIlZs}pq)M#@WV%Yr^wIE3DOaVYrk10F z4RawG`lGyqG3I}6p}IY=@aK!{{*=<_y1Vn=?<#Gf?~f{eXihutebAvjqaZ5y`2YFI zQ^8>b5}mmkR6_XsBDp~YRb~^O*s)PcPFRi zv%5kywCwB_P@9;SLF}noHHw|({{0a>Q91>7c2nhiqwpwNBkkuTy2?9yw+Yv*+)|DN z2vwRjDJ9N$ohL<|T-uO8Buq_r5fSaUWoSqMa^YD6quofaOve7FUQL3lhSt_IczCDZ z=oH>H#FX=#JWsA^YXXL15haRbhRV4ZVVI55k;wysd}?Z|JN2GDgp)HhwYiB&(<`hS zU~3vf1Wt;Ht#>EHYyGy;^?HaXs4jHq>cZ=MIi`-hJE3KjDiUa8+`3x1Wd5t;YgQ9*LDUlWjn@v*sQ~^{x%jjIvmbWzc0d4b!Ns| z(98>ieUri10!ULZ9&0Lu0_hN8RT#hmu?&p19W5*@0N=JB01^r}-DF92E>K0|e$BcW zr>-d}wXJ!khUb|y$6(VSBr%`n7$6&f{0Iur6_ZRB{Cyn72uUJv$fJ*}ng6woL}Ho! zQeWTx0bVhLmpGD`U64{>KpZ&(_z)^eN+z_UjlED2&kof#GBDlv-u&jIO4_yHX7-W6 zK@;!icQJOmvNWAZtIF3TyWY{Ogoqd;BFwO8++AdG^(nrBU=N~{|u;zm;Kog(^#NkOi-}^YeuObw!NP)@Av^4aH1EavHBJ;m4j&+ z8QdpNwnlej#(2YVoev(L^^xq#H*UlTWvIOaJyL3VytlMC>#TmoZ-!d0uc>GGz*3+g zM8G`9f1&)0)oq%Z+~=lHP{VDP_|Osx9&yiot`tNp2w`+92V@sv$(rXIM$B>ni>-%Z zaf3knU`A`vQ#F1BfQJzgHbAG?ZVvhq~?g_f49?jSJRxdn|Ji}(T1bL z%`=QGgAaD~*a&h@8e5zJUx9=`2!SXp7p7$7mX416k`h-OmC#l+$mH0df``yi13ToD zp$zwg1=&0_IVWcxbC5_zu=;?NL5R$RmJ&QSvdK~+imQ1Fk>VOUzXqza@&0`zUS8PD z&2mMmwmyvwx;f~t0RE#kT!*woG*A242lnb{YxkTH8^_@mj=CZP?eW`%LkGB&mYa9N zND%UcHq9`h{GB3HA+KKDg*zuY0F%Tb2F3z&+l0XP@dcP-JSFCZhqOG>1>x90F0wN} zckce&1|59P^dQ$1etre<^Wu2BtW_Y~$7vXNC3h78QX$;$&n8EJrwt4YCLOlmo0Fu5 z^zhy)-0;dS zUH4tQip$H$Xc(^)5Qrd{{O18_n$&e` zd|Ev~zi(d)g~26D;r#gr%v){pOuT?s#Ec2_f3Qr)U12;{ZQ_X&0Mi%+}U6oZ;A{63Hemjbl=937@ILqJW_tIzH=F2}f z`Ek`-xa*trqsNO^Q5&~^o<08;vyxutQz!eRm-jsz#IN%+93;%eaR zXgE+0=N{uK_18YFS5yBSx^6zXJ_a}ALx;NSsN~Gw`T4BN2pVN`OeSIC@T>o@?IE1Y zN!!NLe%X!s!2OPUAB~s5M!vx=#$MC~tlOWZV?GAl2*g}f1J5KT;w`b>CpZM1tzfE+ zazSZI?$_&~!-@x#s3uGWa2S9n*HBw)gD2>c@b>LBO;Sh4(wn zVIFDeJBZn43vw!B@_~uI^1iaA7o^3hp4EAqieF#K$D6mF`&{_6iVL%|Lk#5!w&Tac z&*<5wA2|dm8Qc_r1Y28?k zmt6FKOQjZlg9I006Cgi^#>TpX2X7K@RSQn++^_JZV4x6RogPV#aB)v8SI)B=hyqQ9lJ5< zkDD5#K`{x5O^euHEtjT4+r94at$8d?i8MSN-`M8J-h9i9ie-GC8DhQJ*cN*~KYhfBkpgw$iLBD44VRIK8TSt9KW=gK&N**bvyRAj ztp?jP)P*&|mG1jGgcpFJVYTDS(8w{de!#kx=eu*wE+bMlFsK(AXlZRN|Jm;YH%p856lI9{fZYFwhexRM=nkfX2a_LRMNOr->ZGKn z*VomFVz3AgZJh6)f6@>i5>IV{2V!zmR?Kd;$LcG6A#5yzz;6yo(mulZ7d^eXL>hQK zzh>Cbk#1Na9$J&@r9I!DAu9ZeEBj&|77*h;TYV4(^X2P5>JaL5q*1!hZdk0(&Tt@{7&lvb(;54LB zhKKGpyj0FERJn<$=)kMjuiJo7S>h>3-A}~2y(MQc zay6wTVYKQ95ET&;yDZ(_F*H5hhY(MCCO4ltd8OO?NV_&d8GO58$RSaJL(>&Qbe}6K zMhRRO88+8aD6rj}HAMQ3#n&v)+eJhG_l!RLJY@Xz;upp$cerE%`R>g0=bz10k93E{ z-*~N0-Ri7%6-Y}+s4TrYU{|h*m5-ipHlhp^!7)Md4EH~BL2 zSCGyk8i!?~4cxhq`!&>z0Fd@d+UEe_Hw_@$;lok*0boyVTxgicG`qvMgX<9RTJ-eB zM$9KqpH^CpMl!3D5gBvx#ECPGKcdu$gpn#_hxd^%4kQ4f{KoRX9N8JaFO9VtL$88> zw87%Eengo?OQQKSlT*M!WUAq48e`AnM22idBmcIT``S-na742Gq9wcgycG|p zQ0N}hF*Aq1c%gUu_tr>MIr&?OuQp2k1;hEdRy7AmiWwJ;jf{ZH&NXTND0=qzi4$Mc zxB8mYv4T*7i)`O^Ul=$=jPrQJ^rlf(wO8Hp5so0vNlZ+{CjjOh`=uD`uR)Yv`maNW z4$;xIqAq~YhzQTCsuHuZW)Y^s0A#ar-G+o`atoO})+2J^-~Z<0n~Cw^HtWoauO4va z+R^X+xhxr~Ff%A;~6VKTOSpiZp8jQ^QW64uk zy8G2oZ3jHext*o;%VtIO*6e3a^1k``4k((f?tMM|6c;zz7C4Vke;@yZx3}z={|b@y z0zQClxx2>IoTub=r`fn8bNAwKq9bP7zGDZf50-eTsbUxT_uYpRJ}Wh2VxggJz&z}d zu*WNX;<1Zv^FDH#-S@7*w@5J;dldP-%7*ESchN8oy1(FkjxzP%STUbAF!M>rPK~7p z02p%P+{fFtZY^8fT{TjOSQa*CbF#OP>$C4Y$7es*a-5g9skSy1iyO0UkCo)ko;?e? zSy!2x3nI^P9J)8Ls8+%n_OZvQp_PGhOYZHLRaJZ+WU|P%)&!#bu zmVIR}7;LKd&>dX)fuO3-(Z~0IO&>$NcAVXUnNc_$oo}&ng@ws$RO(0s@Cd^mS@fd< zJ>dM{nG=PgU&9W&@GfNalYCZI(>QUakaxn`D0nFz`vx&zP$G}{I~+s6zPZWJNg;>F z*ZF%KtOSXp^r0V33NlI_9@U@2Z-Q;^(uRneSzEtjTt=#jot@Z|5iWr`#Ov38wTEXn zRbO`jZ~)bnR`;Zgj9wf(X5GOKh-IGP0ffW_39DilukgxKV$1)y^tacYBAn74#U}GC z{||fb6;x%qt&M8iY8wz034&m|3@9Rka`>MYd+oiePSv^CXJ717_4mc9ZkNiO-}i?tA`v72wbLli}G&W zhcgKZ1BHXv#=QaL6|v{(a{_t)<>-}-sm-XSE`de`9c&|^&`jiQ*?5ZkI z2eVLgQh_2DK+VjxLoV&tEbt{ztz9i&99z)sg}YC-KwviuU6CuaYQM5^{i8RrJyi~v z4l^KE^Do53gw3LN7N~b@oR3FOu7k;tINe(x;z^Mv*(V5;fKj-6bOxC%Z83x)ldV;# z${{HqfGs_(?_HyaN~G`9L&wu5*AY?)soQF0ZS5T7`vDYeO zWbC)+M?9stP^5*P2tSq)>4asc>WfBAZ8Y}7&MIIy1Wtq=$mrn1*fnIZ866TgTT)Lx z%fbIbsGhMy=`zWML+tps20fHIWblV?UkL&#vBe#1J;kI--uj1p+esX;nP9Js) zH82V~G3^?*3+`GvN)XpQbogg(bv~!>dikspp^Goq|(=CW_C6cdjpp(CELyf zKwlWB@OIn+I9R7fS|am;{QM#-z*SPLKdeRn?Mx#63nZCjm12F z!9sIboSzBW3$(A66HoXF&Tj&lqQXK*psjWbSqP2T3?xDkNi6TxstchQ#cN_jhpokp z#7Pu$THiz5{NwZUJKINJ#VZA%)-e4PAA~Y~Oib#~BkbnT&{6YOkMZ_?+1EU8*n;{O zO03ugp}Lc|k=0_>G)Rv*WE+t~BFI4^H8+waRdK6uvx2|uYw)aSyb#aR&H8<-`*E#u z?!oQ!yVkE;SM!3h+bxe3br~M-0JgoZnnQH<^TGCa)O8-YU6?Wk90oPTSp|j0Askvw zA8}jtiMzmm5ET66$;;KicLoz}8*#ym!r?OW^ZP*^)eAWeuV=m|;&@I>Ohmphw{~WE zxmLDaoU_AKhR)7TT;Ri?PG%-`fLDP3%=iKy77{7me7S-y88;%jF`PPxeNt^{DQtN~ zZ|-5Jt6JUW($th6(o!R7!VVo5?Avs;X+=^Qy^e8xvEX=T_;j$hWJ*b%euYBPO~Y_eY%o%sqIm zP`I}DOF&ur6rP!QNRyinyfeN|a-pocIyI>*c-N>tasjFXBwH_d4<9enq^{UN7B2(O zMh6XyaJ%(QJ*)XWk!gyqQlg?WfO#Z~ph_pK8A_(R>Dj+Ltoh zEQ2MSQyuPpfy{pn;3xr<8swG9YBfKKeO=Zr{ViLtx867;**^5OTZwb%jv}-5m8P6g zw=?diN0!rxaLozKGS1uXM>q`{eMz6o`C?5cfXxEy2o)^wVO7Nag6N)WD_8MgCOQy1 zO5e*HokX323mPgJt#Zw;J8Hr-82|J**fsq%;~EoabhDb5wq z^v0DUf>@5g=XP<98eA2_tp^PzM%4~hN66f`zutw7kc8aO}-7DU!*!b9{nQDa8TspJygTv4zb@ua7Hn~D2mZA=~ z_p)l(A=h*-&BHt(Nbk(Zo5qxf?O{S)B9p~eoOo4gu)m|T?Qihc9WrpZO)dUI{zk^@ zp(-7UYbbrrPteYODpyT@G8w7LDpid7B?t1`YMnm)*&?e$2xB3>q(Wb9cXg|Gq}z zg^1nds1{%`naBK^49}%S(-~a#mB%^G#ymnt1?OqJ@2~|5Ei#S~!T_C;C6p3r4`Pm_ z6#V$O0nC{!==#|?;t$1XfPpAR>#A_(YbY!Ktbq{&CM68?^6~NU0P!PcH}GQ3_=JYi z(70G4O7U|G6r*$`u5^HDLtoZl=W9jX%FH}90;U`GSFVdVA{^UoN^Y%V0B>2YLyw+2 za+ds^P!j0Vra;>;Uv%R`%#PP0$L7tC!ord&7qPnzII#C$bpHMf7OK!G$^3$Z&W0}O z>F%4DGgRf`3P{flmN??JVZB*Wl4Prc(6Ymj0w8i%r6db!X22OJxJ`w0>f$Sq+be}3 zkwkC53a$(PSy#{rDAv<%_5Ylj!Z42|+|#=5G_}w=`&$K&K%nTwj|>jEt6(HnGWbOH z3Z+`twn7ls2b%t*lM;bR!`u4Ecu6vuxd%f;`UYddeK6G5FkEP>Or|ZTSA8x7AlcZI zI1(#+a@3KWwx(23lBSA{iA&QGjtO>k^S8!A_jZMzhNCv$sEXqm8RTsI+2GU3G@gKM zeb8-$JNE_RzD9LPtPME({#x!&Y?*$Z7xNvOH9=txDTozKT?s7vBG~!*iLSK6e{U>G zTKNfej{1cQv5QH^QoX(i=?Iv;iQ+A=Mzqo0yLX+SyGKttIzgwq%@w@~XxsWO!P5h% z2{88p0B>y1HKQzN1_~|?tWN=1W#{+TR%RO$nMuh~*{O4S1DK5+F7B8#o0DWj%s<0- zjbuEVj`AHmC>Oc_SBvt9_iV%+j@rL9OFVt0D~DTU`=`LBnkB|&q43Thicb(YpT380 z8w*RU{#<{!XHrk_Oo^{vRz!i>qr~XNVm2#Ckj~P*0KLe1-0RMrCd{6ebK)t2TJAA& zyHKI4_1jSz$RcRc?Ch172n(8miabUrIhDs4AbUb6CCk@=nM^7Y+zx)V<8Rs6T~4a8|Y3*8=8S)VNKkizBgsn13D84p)*-sPLc zDZws|hE8HCH1Di4p2D^k)lr>{IVE?-eNxVhjA;s43p+86tZP3pl;mimn10MpC^^fE zzs)v~#dmak(t66VMmtTBJEB>B+C@-hWL?*vmU>w+6~VI>J8}*P_%Dfjt{BoVwMGHw zLNoVPTKKYVu8iRx-MQ~VQqgZYT_kQFcsSeYk`F!lkW78?z0GbClB)zAR)yRWBN|r1t$8b5H1&)kgukPWBwh zb4J2nhZ_P-AMUg07y zZV&|k=oG6AHrfcPP86#LAPxXSnj`hWu9FKx3CUVNAM7eJ4&yVB-3-3{JDKd35XNT* z&N~nmx1CZt%<0c1^iNyp$}d17iP;F(S&u3!KVdCU!$*cv3Fi|dz9|s}d3Sg%@}N2t zE5RS_u!bK@*Y)+@Di-CP!c{E5d$&{p)WNv`;2h4gF-(c&guuYA&p92l&$ybg$&t!L zw{1Lon8(O#H6fwV=2$|dz?F=)7)tpydmJ1bpo`^zY6ecLVlRW(*36G?qZv&CQpm>y z{f2l2LDkGXd{4u}rL8(8XpWR)h-Cgr)HrM8vI8=AZAtukeT^I}Rs~;3yvxa66gCyy zY!VQa;TG|l&GDh4Lo)$38L~Z$M_5=RbOB9VRcUFBjR!Dh1msjoD56P;ZiZ$}_Zg>R zSfUV_p2AOAY8SRer{AEmR1_Fr*$Kjrf&JeTG zo%wO?+BINZih)PILq3p*F|DvbLVHvYU*PJd_qly{WZ%N)-M#@v-U~nONav1bwgD%# zk~x0fUFUKqv~^I3Y(it8mdAP&07+;|OvO8fb(`5_?^%wWe(l|Ayd#-Te~<9$wzr+M z#`qt>>CO!Kds(xb&Ga3!oLw)l<*aLAckP+9BF97C<#nsuqAGG6ZpF+6ElG5A?^G`PP@3Um| z{!!&C@9|o$dRA})fm!~y_y6~o{_m6c|K*xkDygbE4B*dOgIL{F1fq8)vrO9m@Xp`_EB|r(}#-s99_nof`S76wm;T4 zvgbJ5mZT*$6_+??c;XMN*R;@0ez|Dba9yE&0XJfsXk|wiV@_vKD&4lkIjzd3rlxoo z=G*4$hsM1ri>`nFu z5a^&$X1{Xq;e!W*2MNgnvWSe>2JDByB=*mL=wzFd_ye5LxE8EJYe32+^yF&gPo8mi z&%;mkMx77OElSVr&!EL4Jp+l)P*)Fn$8nC9H0zE443dMlnA9^5SKLb%)NG&{Kvcx1 z5k)pod+%Aa4G+D&Yn{7#e5u|_{f)swKido7LuZ?atcaxfX(r1EkNh}p1R5X6(IrF$j(^`wyzF+W35D=v4q-DAG`?Ok>9wllIVLr<$Vo+peTp1xX(*M`4} zQ0Ip&%uQ*isbK=SS#DJGs0o$uYG0EQ6VLS&1EU|l8*_GQ+C^g|+vy1Dtpa>(Rm+-}C&{dlbQ67(jhV4yau3}=VkGgpQ08a~_KYRY){?sB!4ZAiR`prD$CE}A3gH4-K*-Uevo z9V`cWHf$ZUL2Ijwu^}Gq7qK7~!{WZcm{gNjv$8AkRSO6s8pZ*9afIzNblNc&)ocLk# zUitZ+rh?}cDW4R6Ic~6dk;b&Z{$_2zNLzsR?y5h>SW-jhe0I^2&NE9(7f>VALX^_&fRecsbRsucfW;*j2HQ!#j-51Zx;+1gzJ&BVN}9S(e(|b1Z|@ zO?G1K2Xr$gAzh=n7-gQ~?{~DN#AJ!(#8o789ujSgdyaWTs(!-9bo9+38;d+95#>J)e;XJ!tS@a0af`#FWnA-n;gg4_^%U) z&(n3AI2l@lP)&OP1NNh>Y9%Y#uKo$V6$$u(BO@NlmY)FcMBxkYl}@`qM4`?7EIWMo z!keQDdDy+AgtiaSIAx6kl$99@7cwgvX~_pbirFj@!$7gO*rMyaP3N!QhaEG;u7*)V z2M0DiIqWF3gbmZ2VNQB5LbTEopk%cL&UA}`aoiX~eV{(bbV2*;u*wXZi+DGSXRW4PALcdF|*T z?)MSxvCKYs_!9DcVuT7UdH!ZPtULdH6+O;AUOqmhwROOIaOb(qi)cRM*s)_C;0MXk zZ~;<&6@>Uet5S(4+nQ2uiv8=AGRVV(4#Up7Cg51)U%#&=9-hXNX`38znBKj2Zw5;S zM{g8BC1xLNFzhrE^5EID-2!u?3g*8bo`b%@T~8+1s^(=8fq#}3x|Y;5Pfe31YG_Vq z&neY$*4^+x6u3@xI5u>F1{1?P9z=OITl;LlTyw}(wXO1Ku){!JJS2i|KVDK)6lf5b z_UMlC1F?dG4m8KSyv5mJGIt#8xb_p4TG27!H;Bpcw{v5|Jg_QUp*Y7|tGgQd8Y&P1 z@fFO*ak>}4l!wNFl?v@C!fpp@J%_Y#;sys9OIk{#{=}x`?!|i;$ICzdY5OyLkj7F! z&Z*(1swW1qA5}epqVRyEmUh%T{iz4Na|!8$kXse%%rS}uR%0)NdB(tbN%{q@xw^p1 z*U1awd;&HkAr~teFC%b}Lvd{#0wtD4jG!X-^u#A2lo%be;3I28SFzBO#ehUeYv_Sk zrB1Y`*~XL8D{sCt_5czjaN$d3C8`;}FG%f`2c-#I?8v={e{}PZ2ZVdw1;fq1q(+_I zaH=TG-ybl`BAg@ zl=?xnin+%?CqN>qHm)AUhUml-liMl-Y!8t)uvJb5o+ekt{7$+p7SrC>zjCk*tT6o~ z62VtAEM?KG()03C2QZ;5uK9fyx*A*8cl8AFvh^37%WIQK$!W{&)KVLlZ_N$7vg-DT zC3W9&KiFK+`R3LQHHz@qc(nN{hMWW1_JU>tCe-KF;awf0?dP)Yp#FW+zoV%)TVXYS z$JX^rq;Pe0P9}LojClUq;FmV7o!$2P-+@sNGmB*PtPM6^toW_HO zDXF_y=?%zNaduY#0f+7%U{GZ} z)z@T&7yqDY0Aohv3D>G^y=@kw<$U33nVFf;)nU*bb`G=-o!zOZsc}rlJqGd^GOcfx z!a9S;aQ-Nxu3c{J5K@X59VMmABEupg=i@Jz*W))Cz98@#hhxV=#Fo@fkQLA@6P-VpP~u(THoD>)c?lx37ZfGf05mn@ zB)iK+>|6Z^fea@M956HUg2(|(#_meg&Qhqrw_g?W^veNmZk-gpqK~&XG;k4$!^!cT zc~?F~8>#sykARuc z-ae!B?+^{g8zK8{CvS1Zsq5l03#tai#T)!7jO_LF^t7@<1lD1rcUHAX9>&Xp#Od+l zh=q4pDnR=2L^Q-3OTPCC+dqf!B(NrYBy(4t8GNsEh2^95~>sV zad2_OxqYG&CLzzj8tblgH8pAMZo{_0{6?^G;=`ULjN7)A<4gp6H~x;Bk`1xVFYS%p zFlb|51a{gI&gT4aO!?Ka_yQhX^ILAskMGoL=VoOy0`X+BW@9;bI$2td0fT4azHlCw z#v}yrb&y2hf{tBBhsk=->zVRGEQB?TlAxLcI(6GdiBt8J&{9B&039p@QqDCenc-+q zGabt6a0u3I_>6V=dUN@a__t6h1n1<1&uZu&zdH=opV99Dp1MzSmrR)*Muc*4 zosHC^vneYp^Ud@7CeDTQCJ;dX2yvy{rojs{#(jG&U3BUXC`@3BfRV7No$IAMGQi1^ zdof443VD?OqPo9_{t%K0aY;#vO8EkJ<<_nfbeEu^LDP%BPRVruWiK#~Rr%g# zX*5KC{6XE5p?&+x8R{x)cxpR^ae-h zj{Yz@Ox8CvK)*fQ(AFl7 zko&*?ejhl&ZZ!6b55B32(LEsTV}c5>zd>XA0&ihTKfnkrKW1W8Wr?5o^}%IojOblR zHoM(+38yzwF{X{0--3w+4Z-XfFmdQu1rfAz8gB$PkEK-iq?I05H}t>~4eYK!V}fo> zmX`LH`DCEE!Go{Zx8EGK4z?+M7f&Afp%4IdQNcTv9wu z(#NRZ3x6kw@&yW1xT;bOZ9$Oq4(n9)tLsk}&%Y3YgedaHB2t}CdU!0(+A=4BQOxF@`EO4;0Up5qHhbYObe;HfpZQlRDgI7 zo=&+UO_Zt-z% z;8w=zSjD|YoM_xk8@Qm8!6lNKdKIV^K*-A69^R@w%3pIU)(_rf$6O}>41MofvFBhN zMPi@5VL0pqzp!?UZ}xp?E15gwNxq|qJGPyyadQ{$TXa@L@eM@<%B>m^%};&L4r+jo zv_nyapU^-0*G{7#j6X)r2*p6d8dEc~EF6B23qlYE%{!D~m=_Ffb9_8kUPVO;}b5tgt)kS48Ay}XVS1?wr4~Jw>SXe&3BYsrkg%mwp^w{D_NlBBFla1bx0`<2| z#p9dQ$CHW97HvXQISY46K4p^`XfqrX^$z8_m`~_vNS@yN?PQ7E^!kNGAHaM0IL>kb zbJ#!I-dvXP@W|(9n*_|g9U2m}qNkTfX z2dp=hPNEArg;NgxBKz+5E8UL^(r|C$&Jl*(H_*KnQ`GtS`GLtQpFiI{KAy}|hjN3; zt4lso7_@Juh6Ec?b-^hExTEaz=jsG}oO}1~ZRfr89y}&ecDJ&yT+q^j0x=W- zsMjtoR6u5dSOgDuh!Wc;_Uhk%|1BXgpPQ{e0PW}oChpUUicS6W8#c%}+`~Y0tSD$T zseke?#AQZSek))0dGO!F&nIeB;~(ahiPb9t3Lkw(S*NU1o)$|K@;cbSky%)2Bz4qq zFHTQqXEaDSNulc$I~Z;@D}hA94*B9U06vt*HAE;HN-zlD?EVapbM}aoJYn!wTb*N zqm{Uf)FAmnk7HN}^W@gen=eJL@@kd!dl1oD4ofNW3;Xjue2zJ!q)&_0!ArV zr?6wqfiFS+NI*ids#W+>(#WtxUFh!a4o(L-n(ruN7rt*f2K^qi>rjKA-fhsoF!Aj9 zb9ld*nVExwkD*!>tQSs`xaWVLT5PZX^xjmPNp4*66}jH;-`Pd1{|2m#UK6JTZVDU2 zkli>(NWK7GFIF@y6L%~j8Q^s~apIO{cN;V_zB5YvXU?4?0a5&9MXjjhC1Ob6WGFRS z(PI{uv*tB5G?-Wqmzzal4l}p`QXI#|@^-OcYBln<3i#)8&P{NcI*#-WUmXj5F&Lyw zW;$-?AHMKVcyExNZUwizuu3nR@b{J5x}hRy$gaPHg+dM#4Av|cXU0XLj+O}l*Elkf zzTVt?fqH+guUNRDXddQ)b`-9LQ`B;CYRh=k8Y*B9Xr7^%dE>ozw;}%@zsNMgYCP% ztE2l^lZI!H$`s!pM=`D+bfD#d?u`(dz;1XIYWDa@kOi3A=Ce~;3e(4+yQVw(>w3ZS zq0{b#EnoQR6-dU|n#fk{tZ{@s+S#S^(=Wdk3G!Nibg<5vHizJYUY>`u0otxbXdpCp(>fv`kdEt&T<-&|ErpIcK;K>xcms_hE&eCgEbiglygb2H zJl{DmPVKhB7OhXAkxraZgUBx^L$7H4D9@q9(O7HJhMW-Bw9Rm3=DzSkGZmRi*s;d` zI73d(?!f~+c~Kb}q?Uv=O>`=z8`pQ0tG&ZRh5i+Lrd1jk19qfV3cjzl%4cB6N(`eUd^>T){j+A8p}WIz)A^ylyle1W@cyv%<6p;RpxXdA z-!D+`Y`JQPA>wR}^XDHGk75r++AN~F0K#IPZF z(GjcSht?Y|R0OuCKmwK%F0irh zv_|t&=?A7R)*Qj&Zs3rbo5?^M045Fz>rP+``|RNzg$N4U6D1kkdL*vpt#yQ9DQ;dU z>_Y{8mDJTUQR5@r%M*q^Bx?i+nmx}XwM=fV#HgAe$7m^9Asm?@RJ(gp*Crj_+;7b^ zJu5HPx97yEQ)bRa2QOaP8fO?hBsFxx;5OM0Ay+mYx6Vf8hKhn8VSYfpyFd>hOn?PxhCBRz3?IBvvB553c#HQ@RSwzyy2xpSu#b49JTk=K1gQ8SKD?bjl2WSRu9 zy}&cc28xnthXC7zZmr238`iB;(kJN^sGfL4O~9t)p_e8G{nu{KW^c^iWqON1WB_ZO zuvY%NA`M|AXPUrM8tS8IX;cI}JV|&rtkN-2QIMUR7v9~1QygdgB|tYQpP4Z@41I;O z%*ER~ooW>x?XL-|;U1MWsHUXcxLM{v1|++)qJ? zCBj3veMtIjA78!1nq`aV5s0;@T!c0TC(i2I`XsI0`Tbm3@{`*k|9bt+GbCm=P7GXW zfkmng(d4JS*M!aW{kvXV`M|DlMdINyF*)e&y-AkH`<}=D@U(BX-P=WeWyK1)|GmHT zacSbJO{bHXV-%N@Yg@w>)}~aol!MLtXqMX3(Jz`izG@$z2x>0sYYsglWb>~qZik}b zB*(gL`IfEV15{G4-LTl|+LIL~|H1n}TYI1Uh`Q3}u^*vl8m-q_I^5uI*z{1}ai8WV z_WySQN;+Otjt*Y6I)wt3_B<+di0L{ZzsC6E2B-;_X2O?dz@#Gi!&b`f4*wTmB*~zw zz9F3=9ah$7%!hReChMDQGeM;u1ON-Hm1w77vW^dx22X~6Q7;1uWlwa*xA)Hv;+hK) zQL=(>(hgb06xUHm5^>QPZl`Ag{YS_#FlT`F2b)uRNQ@UkB~zoK3ZZ>b9mGX%m5u%V zw~&wnZa|3p>C}B4>QGK`adCK5!LSkU7G28l$9*tvL%uwX@kp=rLw%^Anr#rx zBH;Z4`Uf+%P{@L^-*po9i)bjlHnD4IX!K8%h}n7&&l%y%7@?p!PU+paQz{pc6Tg|H z*kEZ4XoBfUdDQ@5I}ood3=BZH2f4q#+vaL&8t-F8W@!{3249xpRpm`t=;&$71z)iQ zHt&3}#tiYa0dGaAkD3tz_-LslUQcA~gQ+;hoJG_qD+~Us8s$8U0wh^3EzXOMql+Bm zcDxPKyu!j+uz}!<_p>{;M?^k=#CY&lL=#D9!sBU6(ZBPBOUJI}B&%e0pX(7imTi6U zY6%=3)0Zid387$3JsYpw+LV~>V8`@R2fWNhw}~RK>oU&-&t>-onepWPQfAvKlWnX% zg5pHogp21K`6@%QpZ90&9jg72!nF!xxw0k9`EF zZyz$=k&`ufk8VLyNNBPMLkkdcmGl!~9CjTr8#%G9kppZc77g9HCwjiB`Yy1)zc9;d z;I7f9zgxPxmXIPdG&0nOlf;|2{n89x#IiKt<;~`F5@9UCf7f^m8CiL-V^JdE`i~*PqvC=MY;)Dl#aT2ScRL zW8%dt8aZdItr{C&e?A!tQLF@F2_lLGd;2WjU2d`THLoYeY2w;H*W-`lHKAfSyE4$d z_4LM;%1{<%YW74io4t?);P(>8*Vvm#tXP1KzrIKX*E&fQLV&)j*cXvBN&hn&2_H!m zvRF1>s-%~4z37D6B_lTMyA6J0mJIpHb{RvvzcAH>kb$U!Tx@H+AuY+idSpqyVNgYq z62pKe?oH94Cu34+7J=p|9DXRhtgk`p7P=ktkXmS_=-p;Lir9#*#mOv$%ix>grX^wPMBHFIOCTlS z=R@39XY@|Tq(S^32majtm^fKP`Q!f@>Oo^LG!8ycy+k8V8l*dGjYygz+r3?Ff(jc4 zB}f_rS_o&iCSm|As;g6!e$9Uth=g5DVzULYUPRzs$%78SJA=y&I+Wc6w5Fj`1F(v*?P@?gSY77G#PteH}40T!yXKLUYxuJ1lb3x*g zIm`Z_!fIw7v_siC9!Mx8Lak3t2jQ?#Xnt!GIh(U)9SZ(@tQ$e|H}F6(GSZ;IAe9Ff zJqvF)q=m$10mYdgiwpGeXy+0ZlZFF{^Z}*H8_4ZuW}rYxNb|&B?oCl5=M#oj_^~LD zASNWS(+9(kEr{20eS43-@X|bSdq^$K+h!~QSEE!TET+X;-fXK3S)4c{?5;Pxl4GV# zA+|$qY{wZnIqkRBU!iRTZw|!`W_mewx?kOnc^#&#o49 zcwbmWn;lB3v0e|`mQ=%qbg7hGrthN?^JbD9`b*uvjEtbdJ+SCnIx@&y@v<3{zu4)X zm$S>jE#(XS-@mYS&>ZmVpq3RpF^>ztVvOY~k?Q=-$jqD!yA@H=VT%olBuAw3L`zEX zMJXVNDe^-tSrr6*sV(h@7xyBeeC0ucW$k=X8;l{lo8dy%RJ2%5v79%hWQK@4k~h;8!$f06?;bP$eup=2H%Y&*Cd;s{9Uc^5<+SGjI8S1H|Kl;syB z|MjnrPYSDnpyIvGVerO4NYd_k%qqBY=3f`;bM1X#)sO#@{UVh2@(t6jC(tHl&CcO1 zz19D#B6$LHZJ_PPg+if}^(b-z876uXA`KZCF}IJwDw^Vyp1DWI?6Z&W7m;8=)dF}y zNmHXL`xc0-t3-Ahz*SDB&_FN)*7Q8Obu#`5nk+UBsY8H~uqSfx$J82c_w<(fAs+fH z`;x_5*W)RqG$8+JX_>Ufn>-7^M#eHnRGEF&t3;jKqyPXB;C21wP#%JmQi5~zk}=+x$}iEn7P3vuBZtH z*={3<1~2J48ew*qOwC1rt}`GtxG$DRGehK)`Sp+E*N4@Nw(u34%>+}hWw&he4+fHJyG*uvfT(RB0*9lsvJG-^Y$xZ8iDfwRl{sj@OU<|JmX7)@~XHSB5f<54;h5gt68$L*v;u>Xj5q zX7K9ev%Xpz;g1so;TQPvQWPA>T)e#{&g^xV3vwNYZ`nLo3x-OCAg=7p2+lU3hqGkM z;@jdzi<2xktA76ci&dfT4q{VaAwuL3vA*4_@KOAQRYfqfD3oN~hp!l~cBhbe0Nv6E zJj1HO#PTMAX-3DJzzq~=(-I5_nSrr8uWV-pn{*-eglKf3($B1_#bK4%RVeWq@ zZ+<`9CRe>^v1@YSN^UfF-)lP7wPt!*ADR|Rn#)t~Fl4%4zt8aQN{Tq2=V0@J$v0dM z8sCL2sFiGrO(M<3S2hIxu+X;{8GApvu1lxsp18w?z~*4~|0PY;@UjtixxKr@41*+< zw&=OXYx%$5FyOvs{J!ILT#{Yq@~^B2e#Da6!N|awsG0Z0c^UCsVSXG!ZShR2!%)uu zm){kdoP5|fd>LhWudv6q zvx!-2G=ERjtLSL4;T-eM+?K%KAN(4_?Bu-&D63K3iCQgoExDqILJaCiuBa%aU5#3m z!Anz1=^4;Zid!kP!uuNXL=H4Av7Y0f_v=C%HA+2j!iewnSIN^h!ac1pC#dZyc$>5Bajirfz1UK}H~+kSJ^Z#IP!1|r zo$>hYdwYinmo)XRl##zGCLgxLP6B|E?O7fO3$Fh2X+_rIPk3X<#FIwRz&n2mI=`l^hqY z>20a)b2X4ZB96=1=4?rg!Ag(Vd`*F69xV8Vk;;R8%8qZwZW!_J%F*{n#8Mt?oO?lBV>>%srN9nNcU7A%WK zBi5dLQM&)(aLO?v>g~xDB?;TT9^U7B_L7-i!i#aK44!X_<5E$}O_3b_5Mhy%Xv!cX zu7IN+G@vC~3}~-20PF*Pt~95|bV}k|TKr1y`MI(ZmU)e~xf)*&)!%ssok>gK1S=u- z;xpZttrY`&7;()GC|jX+T5k*Ln*3z!W_VTmA>uWFEXj06?6cSg=cM0R2c5u}xlm|! zIrTgwHO!{8G5&;@+EUc7Xc1AhxIlaYZpj0dB83265aNXZdeD~~D5uw{?*ltX<_d#r zZES1|dzau%y*29!$pZ+PhAw+q3?r>ZT^YH7QGRM#=rAZ(%n>aIX`T2J62s-8wH2=O zffT^=ss;3tzp6>e6dVkPRg$QEA^ah>=t=0|Fb3$Wq-y?ZrqOU>oz zA)!{87Sf(}&G|1sTc@3UdkrhL@OolZS;>|HpvqRm=K)t+Pmom_o|;B%d2 zgEDA30$m`+3aSGHIlNjVKz0c~MS=A; z`yWVuVwX^Be0QQpmU6Ut^V|KCuu(%#nPE$tLgPR9vF6EQ60dzCLx%Ii?RG=;9B(3E z+>dYj~~^0n;cE&O-F&nsmVcGFHi?$%J~lyr1(G;qBagAj@|Y&&m|8^lZo`eg%~rf zB1;)hjbII7MMcHK+}x@==@Fq5oV8~zxfz+kP>0%EL^>qx&P}q|mc!px7w3)Cf?y5N z#*-OVr5O)D)*sWgd$d#B!JN9!YccqP36s1>x(5U&h+0#o+CR6h`P?2@Q~g3MB#sBo zxsBnD9%_}eBhytr${$?{Abb3*{`k06?5)V&xy|C{GQnA!Lp!u!e5&bseKI&p9;}?N zucEQ>Km*WA5Qs*nyLJSPen`L%$v^YFWYlaYufZPZ_4oQ07w9w=0TIraiQz zP%2fpsja&2&+fmSfwRSLk$aBK1BGUX>K{<%ZPLBOEKsvAX+`SGuf-7 zN^d4X{@2l~zB}YeaTSXNOi3u=a6K z|41`6_Ze)F4LL!1tQ8Iu6vT7UGOPy(hYR+%uwnDbJ7L|xLhP^T#_F)cXB@Q`!g-GLqc zP|Mnev|f9u?%9;E7nuW+P=()dVJzqW^N_Q3lpeB2)jeUb(`EmP71JEr5~fh0y;h9U z7Mr-C74VsBSa>WxW?4mrny}TR(MG|3M&+JxHb4Dvj@K&oSev23Nk*##XP7Sdtqktql`v3Qqx+l~f*;`hNR9B(#w+69sj&7b!VWxG@l!*You z!Rbf$v1i0K1w5tY$NfWM7oYq4j|2IKlcMp;34(fJ4m@O1vEPSXK5Blp9enx@@?cvH zLn2H-pa6gZc4-TdTc*|`&>?|g9Nc<#VFAZ5cW4XmTZ9Bt zFNwWqhr?DZe(2jEK(Gb8k+JeO@5x9NxS#Og6ux6zhR|3hhZV+s5b6MhsNDoXis*oj zz*>Yrn~NY3xJZ81DR_#f@~0%(FIh~-=v}(hja-*_TO&jlfg8u*XdB#6T1U>~!g2wj zN;JxVH4%$v(p?Wgk)q}_f2p+n2o)5-K+v&OYckx}`?0HEy6c?@Ch&YDL-rTwtv$Q1 zuLZw(#jvHDOu8ptJYp?I?5e;eYeT1O=;#1QDBZ~c!o}V%s8|2_Lty`4eX{)vV9wu( zu?vs_p&SCQ+yt6Gj#?zk#|mEKd#y`IH?r;L#R8xsZiTmQL-(|2u;2YCbJB^$z|uzQ8) z(M&baw&5j_iA)%}dt1n$&usEbRQSmkBcZF^S4Ld*w&*KOAQcO&Y& zMD2$3K6{#r8Ehr11D$**>bQHf(J zY1g@6jS-jO@;BFDCJJ`JwZhNKP@z*5{itJVZD2E)M%I1rs>b7O_-oe0yU-jRgPXKf zGOypT53g=mk(iyi1x9$tMux`wtc_OL|Jb)cqGeVOOH~F7jhFv0j`o(%Yh%g@-ZZ{% z(y%FHxr-{6)&k^GW#ma4ta(^q?2voH15zxsOnw)I&3;B2g3wb~N2+>mLq(y@N2#Wj zg^vVgYG#X_Ll9Jm9;)ZNT~ND2eQ#3*YnQDZEP7NK+V5VZON*b=D@ej3+lIpk$eThb zO)~ZO@BW3^EzFFJqER(ZvDVef#eYF{_sRK1E+$NGNm_f3B3VB$b@X z)xBQj$-#p8m=gK#HblN6oJDMMu*Sv0Ev*V@YH00*;7k(o65A98!*>7vh;EU6iuj0D ze&}R{@>^n{6HyqP4&p}aFi8{^xB8DhsLxt(XHko3 zdyh=*j=lXKKib`B4_|%nso{#O(g$Ry8E|KGdGe{!4~0L{H~fZGLh|{auskWg7g4OR zXc*35;Q1dvs?ISQKZ=9D*Wm~~%Ml;{g|Bypes_jJMS*8GvX37-&PlztD=WET<;1^V zXT`U3d%fV&k3JXpEqA7t_#d8-KfEB-3M(EN`SuTgy5g^)x5;MONi{Y=b|}A zvOph0s2Fk#eZ&=;Q9`2h;TpreX7&x^$_k)z1j=cYVJp8g2a%z|!ouLbQ5{?{D$YVM zLv-UyI#ZQ?BZU*;XTzFfO8DvCUfm&Lv5E#m54`F!_ z_W#SvpP($Cqg+67N5U&^`&lF73+6UsN}3>!Ss$S!d5CGcC1zMdrMy~yAng@(gjs7^ zJhz~rJ*YcxjoVNANQ8CePxna41YlQ}K%E@^^!@wos?78pmp@f^Zx zD+yk5&F>Vn%Ia6HyeiIm5*8LVs*K4~9-Zs11`SazuEac?3TT9(*OZnP5ow0CsckOG zMH910K!C%+5p~G82-_HL2RkgAWqSkI6oe)yX=}T}sS3>vo}4HY>Y6j$UrI|`kW7GV zRCP$Kk!lZHN?b1XHy-Pt+V@vHs7hNA5$tdyt#v|8RTboc+(AfW4%fVW9K06rp0X=#3|Phpw@%bM##G3kHD7Zep>CN)*OoGrq|j?h;_J8m3W)c z|9QO->>L+VR2bOU*mmyhg2TMQ9@}Iwr1j>cmqUiuQ?0vc`^ZZd*ol{Z6B=WZ469@Z z=Qe^_lFQEQJDV>9Z9B%tr>3O|<~H$qAoCRfP#te)NlA(3w>`#L({poc*RF+n7gk6U z3<`j(AqVz<*lCHs>|Bt538-bLnKrIpUu&2LpNB@4dCRv%w@knt1LHhM$U^lKuhL+M zsjdhGmr4sh9vc_eg17Zg(fQ5JOY1XM6NpSIo*G<~7@T2jwuQ$q4QlDuK$1#pHfZ9h zI4Xeu9^^OfAHI3u2fAQ%f*{@yxe^E3CO#Cm{_SKjh<9t(Hgbr3Flv<2$+jB=ua71= zT@N>#EM$75oWa--e~LrPYG$2kJh6p!8Qzex0dyc9zhL|SOz1T_3wsIHgn$qaH}@}~ zSf6ulJ%E~+h}i&u1;P>n`VRV6JV8alrB@3Kfc&8ILs908p+lf&;D~U?$XZNIL==vW zjt-nMBnh3^k(upmggYH_L)s zv6R<}TbY;|F(S!5m&`#VB{>cYLd56%!3_Yh*!|3jW2)d<%;?(zuec;sCGLeGuAY}X zWOK{YN;@zQMEq1kKkR88u@p}9Vq6JH;J^K#W}xulJiu=>u`*+{4StfgTr?Q(gP1Pb zxIG69DV;msfk3+jB+L6;3|wNPqh&8+m?-K1Yzb(?gWDhS&-Eg(95af{fcxN}#>w8G z04b5Z)d{v=R4T^XjDxv``VRQZFO9Qj&!T!%l-BA*y)%l0E(>< z3ydTok9TQptgLiUU*wY4*o`oMIFr?=)!A60b2U&XPD=@x(ulAZN)>|zt-Yjp(DsDSGj5#>WAptR6M*s?Y@nEt~uaRVE zqe3iNh=J3Ic$N&7Njm0XU=YP$)q^?9bXT|I$b=pSKMapQ^27XrSLV>ZgA3k{)Z|?0 zO$)D4-S{~X0}>aPSlK%v_s>L#iF<4BUa-SFE(yNdpr6lQi^oq?We|T3$ixq2%&)UF z6YJK~Q`%6|N#y@$ZWt#Cu|UCp&s2J!d?uhK+-`yPRZ_QK_(sL56?tpnmeRO==MEAN z6lt-oORxs|U*GxNtiAp^tr*5sH_V7P6L{(cKd;Sy0g>*e?Y$qd)f`0_x6>IZn!EyfbLIJ;2QGcht^2+1!ve89Cl@UGH%Stx{q5I`L*NgHt( z$IBfP%M+<*@i^haUOyX)Yf%2Dlqo%qxV~NDv-7BnOI!_njqMM0f7#%SDhc~Atg7M+ z38mlfM!jA!ULj7+hb}c3S@w`&H)Et|gPD+3ryJ&RrV4FA_orKOU@HOWKaA?WEkzV-audjNwU^`CSKpNnm?el;G9+n0sICKj;tMZiEsuP6r?ebA#0PB_q@!Gq~A9Jg^VCW{yu!TRLt?2K)T zk#IuqUY{v5wnRPBhXQ`ElkHnvO4R#Ji8c(Q9?LO{ia=~Psh zE9&FwpeHPtWRA8LL&cSjp96Fk+Z76Bcs%fQM5vYgIMNVm%R5B_PFY-B95iSdbY<+m zQcD@l?4taUQ}XazJ+J5LlZvFOpm;+9JN6RZ6P8dtGdxr#9+^uQw1~%#C^3&fnn+v_ z6VM=nBhdOCO#oA$Iu7@zPX$bBqi`fA)P@R@X6VHXnPhg8w{DQe6K@zErh0n{Ww{qb&wYG0K(Ps>YYtn_=&wSPa%`Upp_KK0PY$H7n-R#USh zr+z+k+&J?#pzwSTW7R?QD?_V?HsDgnb_0Sl)>W{S(zV;{+}Hg)jOoesW~wnR4Gtvr zTfbxzRp8b8_eYhFbXp&HgLiR?ss0`cC ztWar`N^_Yrm7$5E5YdbVn$PRg-uF5G^F05xo@cFd)_I<@*16Zc*1c|ftKaYY{S4Rj zzTU%nb{?{VRstBP67_so_<%6Wsconf!xIXf3vGMv#zqFRNI9+w9{Xo@hSK_`*Nyvo z?EF<$^bmZ-Fsf(;Y8%p+W~B9+5oSPryFh)*_c3N)+z|mXN`%*EbY_5H8*`ARH^wM_ zT95u#L=SWpx|ie_ED39CPsepSrRxrBmh~GIdm`T|^?j2NjtQdLh}AkE$0G?FVTt+e z&0IYh{un{nB0SZ`#)f))DvW8Q;PJL42gq|_EGr0mR2EkLFRnN}&CYIT^oT(RS<)MC zi09oU>tpJ&zKYpBPYr9{j;Ltzq)7sfK7O1iA9mc;%$F!z=rYmC#HYGuas08sUf8E? z-Mm>Bb3KiBYtdyuW(1gvZ4?@5+6sOh#iM*(+od|UFrjM=%i#_{Qwr9JTrA-2aow8r zffHv&nm*bPeyc&!*y#Rdsi)zuS8HfwF|VNw2$^?r*Xq^Ja6T3}mXiCPN8h-i-%`Ps zu9!nv0YbwGV6eJyf{{G>S-M{=R?nL_mGdu9r0BI6t#MhHQt+W?=W9enhbUmgjD{>H~Mihu3GNhbBGFokA zK3cq_PF1~;5YGhXU7R;%iVd%(C0z)fA-D23<5GJx%JjG1XB})E5&kaj)gYN1e?wjr zwIXg03@B*R=CAdYUo(d)J8k0U%~WMr(;r5)aTRv@#gt+gGfF5!M4uxiiij77nX z!JL!6Ip$P|pFpE&t<2WU#%jW??e0@I{mDEOHjY(O*ehOGz}xm{h7NohT==w$P;Tm# z;7Io6*cl7);diV+(ivy5xrl~>u#JS->6FupmoD}BntJ~Q-X=!rk;m`9*wC`s^l>%# zHrzDiD6~-uN^j}FCUHJ`?N^;m?*T85P>r>usD-Uswk#1hZmJO?C*FPhhz>6J*(#6H z50dK5k`vnfqGOkg+{4%vjyEpg}TKv zh^L$BSS*&yd^?2dIpmVfWG~bGn!FFX9dqAvB!25IX9{rpsfOvyIjh))-FtOT$kV2# z;UDcRI3h$hoM5|w!-b_wCMcN|Bk;Uvf1lT6#zJf6D_!M5bD)NOr1Rp`_kTNB`VOz2 z+4+anyp6bI8CNfd2v|IX_e>ezEN`S8KBq4G6jeXc@z7wrOOWIc|DJmp6_S*+^ykSt z_qpZnKkxf<)BXKdPQFQ+))pyQHBmhEAA`kHr|sAcqph@8yq&_KA~fCDU9ohnBC+7K zTok!8`qd!#m7-=uq|D;iPa3f{2bmaDKE&yd@I}EG=TiTxp2-wAe5gE2_IHCe+oE#v z&V{sjltR=n4O40KYSurDp$o68tJ5&W^YvdXfd?|H{3Cu12vmZ-Yy8Zlp#L7g$WJTti%lY%eQtABHQbFml5+7-0I3M zBxGLNe;tJx$j{}FkV+dGPe#W~+kgqI2P?Z^62V>Lg1wTH-fR^W6^!3T#?vw3;BmL8 zD22+H&0ILcN}8H%!k!*`LDitjF7M0y%i`XdR{`dKcyDKJ4jMt$J>jlZ2B_h zrG3N|#D?RF?Y)7eF;iQirYJ$HWzl1*Mk}d;uut=`e^NbNu`K4e>gmQiI7aG*G@qPs z4d_)&G1l$We!xQ%&QYLKyWet;3l;E^p#9`+9H`bhvdw?imJ#YvYBEEIdJ#&0^5olX zopiFl0qB~!fA~K69+qzBeR=N8Z~bAuGBxSfPjyytw1zw5DGR#EQIP{VxY$f~16e_F zt*?@f1;cqkjytkAH@G(qzQ1Z`NLK0V+xEkm^^-}z0EWWD+C<89HbxR7OSd$r);vaB z(wo?MN=cy?-NRQS*XYiY5_re;Ei27G#%gMZH^}XfqLu*R#bYPFXQTh&@mJ^DjGZ-0 zwD7drO73HEL!*8#G(b?|i!TM74pn$s^d6SUg`R^oX-G9S%YIT9r;ImDQ(p-VWSJbH z$p@LCJC!k-Gh^-_za~?N366f6%oyXTJG5_kRTwR~(-NJv9sPg3jDJ-+lqZ4~kz)v0M2qNt^CZjJXmX34|e zwd;E9>xa+tsb2GV#<6vhWt`_|to&e=vL%9mQ;d`SyG?mTMqs4#wuR5gUc|lSAM;Ki z?09n=EFQ1teZ-l?Hv>>{O-4`g^_%87VeZ_0Q0E1s3JvUzw&gjujTYP7dG*L(l8M7n zOi>p-UXLbotQ0~lurjFp+Y~95~3{b;+smpDjbd##tn$MWs2ZtFF3TR z)~>$w?F_<)C$wJ1jkqPrKP>0)?o+XMuM%4)j5Fu7tZ~b9wcTaiYZ_Rz$AqCw!3CWgX%YRd&9hp8d4M|9FnWiQqxq0nC1M=1yIKESpm{AM>-fT1(YRU8 znsYaH(yH!7Kh~PBtEy?=UYHu}WAHS1V*~jojGMy^Zyxem_9iC$;v{{`HMRPduS@)v z`)9wYnsVaJ>L263%((S9ZQavA!CBCJc;(?bU+w>%FbExu!1fq={r^T7WWX;3jM-UP zHb7NqWf~BPTuV)TjleWDt;46!0jA36(ys4s*?m@o1R*pCO-y{LSaJq$Ah{>(HK9bK zY9%o1kKWvvI*-*Z^?X(L`}y-26Fg(2Bm|kCBZJbR!I2?yu{5K_?a`r0jiMEbOen?+ z#K0v3R zu4P-}Fzn%B3&NdLmpzBO*W!+1B*q}gP54qK{aYw0WW4+&Y91k;a%-atMEc9bvcN$T ztD#mRkNVt;s@(UKRGEXpIbI&NR(0OH5b28<1mFf}@~p>urZwzzf1OuyezIYZhZ?m% z6^n|hYVm*2Os*+-v1fVL37)k^wDw#t`zo%6K+J$K;~?A!`Go`)`5+xx_vth@PiSjE zN2@uVKx5z2^esQo#b7^4t=$g8M@ZgRuU`G;e239@T@_EOC)WVTgrv}^h&IZ7;BQEJ zc}NFNZZqw`FIEVMK&u+Q3t!|v`b@=>R8TUl>STuPdw2TODQd&7$V}qNkd`Pe?YYt| z?QfTuidvbm!136|*+TWpwMQ&V#m+Un7pPw^|I1obq57|ZMgH~}5qQ=iZY*AaewXy4) zA)zqy%qHxxpqk24}U=nIQl^nxwde?cFxW;{gV2{8b@9=uO%guO-Yw8Y^x+~4} z)izWwAQ_RN_8b2>uDK-`g5tN-WM)d+?nBu zEZtA|a%Xjtx;x6XT*6APN_1U(M*79AdhxN69l{@lH8b>IJ7ywbQCoYs0rWbfo)uhf z=wcGm(6TO`vt?I&B2aDXD*w5LAHimPCSM~dija^mGhkXnRKqVgETaSQVvgcri46g? ze>w5jqh!PyH(r3FT=;7tKlE3){L>u~EG#(NJb0}G-_8hEIZ5^{q;1|@u6qA@m&MZG zQ()|9%(|J?caVnZvh$~K_YM?EM$zfbVW%yT?XeEC8O-s9S(MU;2Gd^g8 z36-^wfi{MPMWLQ1-pHdpb|Z65)@{3OIA7PCRpP zzT}&v9>37M_Gzd(y+sSgK#=Ihi4eMuEZYNS{|G{Jc91{!$^?s6S~Aj^F8{;K_K5zM zZ(~<-rDAtZ_$OCtyRnM(%ePPJEFdQ8U1J^5W}wlFc6d_-4Gh6^jPNZbK%Kh zdSL%w1EsWO>$mNBoBZ!Qsae8N`|m(0>f~)4XliO3z6y~ucr-m+?&8=UZs*3j&YZjF z{jmu(865}CGJ|Nrjo!5N$JedrB+wlGBq5z!lHyPq{QDV?#_YaY=V8%jv;RG<<048^ zr&eRM<+s;g#Ac9S79R0?P+=?A6=_th^&x74X~UMljM|WG z1ANH&I+uFXHD!!c3#dK?#5cfetBHutDLf!9wE!+`%!sibg>YyVo+o~EZ_W5mIS0w`7HoKcqhBazDsCoIO z<<>xr<`n!v!qS)f*rv+QEsBZpxjyc-ROBDymd;i>Qq`}7fB(Ks@o6W=qvcWdMKY<0 zX12Tk7SO$&Q~N|h=BGX64tAD||D!3K{E+`?iJt#2e%9tWjDr5*-%VCWdh#{~B2e^eGK74I?Q zYVuX;{3SQXi{GiEbLY4GPWs)(3IBV(v;WDtuRNRol`HN)`}9X?ure}SD3t4pb8udi z+bo6Yn%*qQDS>;WP=e`{f*QW}Fpj3F*VjvXuh@O$$N;VdK*j0-o*~nu68mir&ZyKo z8k3pX- zQn)dFBd{emo&nc!uD?K-44}CL7FOd)t%tJ>?akJGf}0Q41GP@8Q**^U4PhZ0(hi!1e&}9ORpwBvS{4-^`H^I$aXwEs&msT z`1x%ZI_6I0#hu5^*isu|BfH|Bm707wo<{g~;gHnPHP>=vhD{gJVNLs4b%ss zv>y_qa$6XpxxG2P{+jDHl}t&1pMpS3d0{ z!0%;cZBR44ttez}1(=%b_QK)vl0Jo9J7nbySCg>T)&~BgatTDTlGLK;augI$14BYW zM#2pH$jk&hEIgyg&O_%44iBM3f|NwAQ_4#j*Jey=Rn#3nYF^MsgJ?QsA3Qc34ipr@ z9*N&8ar*DXDrRE{q-Ov6>%vTxd)vuD($QMEl5>4^*I$ILsz5587}-TJ2D-YRlc$au zqwcYrjkp2Z#K6rIm@p zGW4{E-JN=OyXSp#71M;?ZN?UL`hi(}Y>G$R-98{?9y}x58M7CeKKQin!R^ULaStXX z2?r+4Ugjon0-@iMXJ3)}w<7h^Q04}>!(vp45d)o=DuisWorQaT%-lGs=v$@xzy5*fgG(rr2h29B<#pTalauyZ;4q^Jl6_pc!9bAKmOE^yE`mOJGaHPSrH&9H9pz*Y%beR5xOC&Ckrs8^RF3?GS?QH4S0h66US-t{*>>?I zW2U?+2RS_#PYj5w&g~mwJQ~^>HC0y2j~|evG^dc__Qf?Uky5M8_-l{u-OG8kwhtXH zR~Xr-LPP!tA0sq$V*K8f6~==S3sYq>jGQ4CZQHXeDAXq0M_vf^?nUJn->_j9ZV)~S zQa+U`XW1_1M79<0G4CV7o=?L=%rF%n@lVQXZmMn~7ADl5RbMlOdQc>xZpjqFEg zgA3XSt33Z&QkuDP{O#z#E~nq)!)-7u@pI7eILSdN!c;|=pSgSv zJZf*1)7^wQ1u5&LpFdmB_PWiZw5IQOL00XsPq4E)2=0cx^YY{`gB5N4N=5TOBrf6g zu9ZwQBpRZSItkq1sn-m_7!2?jkJ>C-9uc)?56$ekp22;BMFJOHrp)*@(v{Nj{_-6< zcn{=h#bw3UHXVAzgi(gn0k4{`Bn;30YOdDe)#D+jdx73xQ~m8vI&>ye+4t7o#Dqhb zXEhXTu+=hMHe^8Zo)=P^DV&Karn7{2f!cAU&ALqjo~13X02ko+E+Yexo6?!DCf15x z=16R_JnCfRtONG6jxszO9{60S)H>R}cj6{4UypA%F;cDgN&Ky@yaXO+Xoe%vayUTc zJr^OMyX@@pUSX{E)z^2p>?a4yA;X4M61yi{XN}v8KMMa+mLqi#4e9~hGgRxI%uq>p zA2H9UTCj&_y&h*iP+hqFKz9Spgf$9yI|Ed(#zVJC%Oel2ZkcElA{SKa*6-lXFw)%(WuQhCH?+g&JVTZ^^HH|w<_-Y$%aVBqT5Q{F?P-?o%G~>!SZv9s`C?zF_i>LyXrX1s$^01KOt0S+H3xr{5EPo z($%j~U$$wOCfvv(#ma5r?xWF8j=_b2%Y7V!oo3<{oi$QTgJqtSMTcNB5$&H-~=gS|hjJO~`XEiu!7&QwHBGgy`4c|$^HTqOQKy>qI zPy;S-G&e!Prz?XN3IUOncs$u-1$9y8E=;b;`0cWzV>Lw%RYNjtN35pJCH=3!_&}{f zSqUPrwJsmUhhvAiA~SkF z#ixFJ3NXBKNye;t0s7H$Ecy>K)z;yId}Fel?~|xGK9CD?bCFkYFb-5S^MYScv68(K zo}5$3f!XlRPb&S`&tPJ2-K6Am9Yq(=G*!+*r&*E(_gr(wU@DgBCJ&0*#xy^Y~r#C3&4 z5_IVj>3x*{g)}ZApV#8~LdmRIuuf4`rxIhu{C%+)XW9!UM4#==eIe4#gRDPZ&LmAR zZXiGjb55ov2vc70x^>b%E_E?wA@EPu!B2#Wy8HNy(z&aVe)SLjf;%?S5_svp!Bs(p z+V>cFZ8bOl%_wf!$`OPsbSwEqe$u2@v=0pMnVCdQi{H)-?&JCy>J%{>-I;C zjI|W&c6s+3sT!eXHC>f*qAXbYOmE#h`EDA|Gv$B#tpu$VPsIDo88iL4%VBU*d4Fqc zRLx6s9DKoZbJ+6HEW!N{jD=MoZS0i-HZ{2yGdfd276hn4_2YYHwrF}ao=zGKOur&U zVd~WFcs>IEzNwxX=IAi8p`T4iP|$nsdY(spZmqGPa408;=V5uX>eHtJ^hf-+A8xic zcU~|YaYkB>BxN|lY+;f7f`2xApfjz_Yt7&h7YY&bLx%adC5v2InjhQ*s()4R5_A9@ z(nEOhFgyzLAiBYxGM}w*`Pq}wI93nU-gM^L*yyf_7Eg{KQ3a^SJkn8+8~czbbR{GN za+#`UniP##U=}-!r1jb&xKCBB(n^Q%#S|rD)JwiWzjp;v-Q{Fm!9E>{aET{EqET8}MzAd>1f1J+o=xFHIU)n7J>}o5D9OlZJS21QoWTzc#o3LT z0YDze&b%TZllxbpHlOverXOsnic$O5BVI%Fw+>Aad_xYm!D1U7EJLG{6+vYSmr335 zQ(AZ0x2ZdZmO)!M2{EZ#j~@PO9^Id7v@ANirBq~6oj6fXP}}{FlS`(K<>(MKZgk>% zH-KvLeD@DDzkj~|EA+e^h9#uX?Wfee4UAMe&ozk&CG!K$&E*;zd8T6Nb=|ty{8-&` zX4?=fp=@xnep;pCs+#m_Mn!kIPi1AWrBv$H>uxCqyAgW(_4Fpef2G;mwe+LfGd!g_txt{A^B8Zi{Kj^qVZU}_JY@e-4L=IIFjKS z)i|&72w_Da5_xtsR}S*zEY&vX_%z#>B1SDqpzTP#XJ6u}N7dtOJ*ln4`?_GK=|0VEy@WkG`3vMKz(=&Ke zTM<xT>}rFEOu^7`<{Klc5rv?gO5PN9k*e=2JtLj3Ltchxs8zRNw~obxZ@=*wnrDv~b)|GQ9el*mQu+&F4bS`! z0x8p!p))O3)1$vvk@NZ%Yv=2`QA0z6WALmr2O6NB)tO2FuLPdCad{@Jwm0 z{?vz%W9hLU1@iVF&TS(NJ|yJRRpo;rN3AkQO>HjsZT=%Vx>kGtf7(qD3DeUrmXfeobUCn@%*er43DPL71;VsysG$7g&LoXI`7 zo(jt&fnYLVy#P%PMn?v94c~NH{n+R@@9~*hp)+@8J${TL_!)OwgB)!=_CMkhgO!JTIfY8pY@VC+ z6jE>29UX#!%0^I*2v+bN3B-^+cFIS`xoUJ0#^r3O^@^panItOWAdqrwg(B-dVadGvFv~WV)V>+9?`IGoI}rs zK6w%64D|Jpp_7&~@{{VQhCz9Wc^OrU#glvH#5Lv>yq?0&9RlrgHgoGKH+9Y7#hyS!E4X`>B*x<%^<$! zS>Ex))?gmuoD;hQT?Fq#Sd0mapJO*~-ej?`XI_Q-v1s$4sZsswY}im+Qv>6C9%SWO z)pdpU?u{d-g&)t+>hA82&|&4d$;-903>-tP1JP6TB!JeOs}}NRe$REq^EyWioon>> z@!^!ENdm5*&W7R9f9Ukf5G=u`sb~S5lt1~}MWudw1Y>F9tXcY!TQ#<`F&Mf(99gWY zDyt{EMD`}s13%}QBvDO>&`7RwfLH^A0`4QOD!Jc&tBpwh3SV5PtthVfgfQf+dbw^s zqT7d4-8juDzBljBs)C~(dP|A3kLUUG1zey)8G0%A*|Uo)TA4iy69&Oj-jKe2&U4}Q z1G_xxh@4Wv^$Nev$=JdC#iqYR&X3*T=;+8NsPrF|YtYF5wH_ItqkAYS+AH~e$io(JE>D*$hEb?xC?Im3nrM;GvbrYI_^ zU!OjeHej9hO1YThW5LF zyWumVX>!ur3-F(5=giTgM|)+8)V`bL7)68}8-E98@Mp-pun>b2z57$u zb11zE-)Rb_3qQZQwzl@;M?E7Wr`F6IUrod$ui-bcEh%dn8i;rFI(c$qo1eoceM7?} zeldbYO78GeOBXFdnZqok?Z?pJHdk#>eRDYHP7>0}L-|aL*tS*(hKW|Ifysdje8alA z4MSwtR?iqU>aBC_!cKu^fGdGH;nWC($ym9C?=mzT>N&Kz7neQFz}$KBP_#PqB}<@6 z%6u8*-31G7QS^EdIF!Kpgxml^!Dot}wZk;*4=#rN_&2g-^8+@ngt?@tse0fTrX@X^or8^f8PA5^_cC`>qj5!7&PQ2dDs3hJHY^@Y4sUiI4R zo0rqBKH@I#D*O7)o9Zc=2jwV`Jzi3!w3J~Ud~?yl(rl{9$#2${FzVm`y8qZK24^{6WnJ0 zK2y|_H_O3dCakR6e7vpw<;1`wQ|C99h&0cFeh>%+nU1Rg!4Z^1!lOx9DqN>LDZUQMI+i8-fbLX&DK0LiK9iv; zM{0$a5{J)g1)cI9dE`B3Xo(?28m(DlnxS@s8W|;WYwJGFW6mc? zQB)7kev&(PUMt!6sXB)G`y(Wj+*>gvBku^B7Q&DX8>093=+Wc;-MgQc6;^7VN8pF) zjSIvmx+0D;MrRN<$_t_c(mm6T)RA{qi+YJu%{tF=*RF5uBE3En*4$HDp{{-hzD*iE z7>0qw<^oY9SQlh-@|bj9fsrDH>fF|AFs1z&3_#|Ivb00SoECHh0TG8gW$HviHi!}P z{;ZPKRcDZKUII{xeYXf_mx#^3$6rSDv$w14PnCzJoc-%BDkvK$JVGS`sKQ$3*p3|b zCmn!p{sI$*av18J1>-e~(#wxdIB;9tMstaYNii1`<2)TY_xT(nvvEW)NptcG%2-uW z(d&G`Dn;szYe>46$x_O>D<4$_EEu*x7n5l*2TQb2oNI(p@EX zaY8rS}87_NV`zTUptFr}K=E~{qIqPyj*{6d>&&EeDz9f6XN*A`_E|GS$Ry*x5$l~Wn> z&F6Wh&i++Ov!iWwX3zk-PV>A7289jxSVp^RUSDpP0g(nTFz?!y;V#ya{`urFS$!D@SoQ9Wk+ z_*hO5LL3s-Quo%?H^x3y<%Y5DydsuT134)xXVsdbM;ufIx<4qTx6>O)q>BP>C zDI4w-BPso{oUiI1Fqo>*O}8O58qM96&)h~A3q|B8H{0&vK1TOIxH;i-yHxGpA5Qs| zn7DS%6C&8js>{lX9}%!wHko2*N`YFKfpypFyX_w=4XBYxH7m9K*pgeQ7f4B5%oI9& zaC_R;*`Ylgo>isAUEF$Y*T)rlHQak5$`iQ)So(^#Ett6&MQzLrp`qlqqCW|8$!(Vw z&oNF84Y46`7>s(>TmHO3-nWty6D_01xxO>Jsiyj{g^(#WcH0BzG zZ!unJD(?HZ{PKWBF<4C)HWaOYUUEe*zp%E)RF%_4>(;MNrFi6)oD|22MeAW>8(IB2 zsg6MsK?A|p8-#U37YhfpbJrsR23huy2lK#W&Jj)#7$1tT{rmQT_}#v92WyJy8*d(j z9Y9Ez9GL_w?3CR>g9Pd&WE*-@THmj0bUq3}eE03cy;Oc~oUo2jHT5jK#B2RZ|C*$e zjiW4zB1GJ3uNEw|#XZO7Kkbj2^_zkSnHJ>=LII0$t{6#wNj*EUW$>-n^BmGRBum3=B?d6&FVuHyVN zA-eiAo`vLk>OO~*r)#jwdswCT-oAcFU^XWuCjI~dynlcEgbA2;{01`%5DnjzuClr( zS)bNtXZ=a$bho1R%dy}W0??(aHSyFc!fTa9|Db(yAC_*Rh(5F*OZjJ?7;)&;6P1Jc z>M0ISsy+Rd=r5K(Oz_Ouo>Csor%P5=FNo>ct=qou);)XlAbNUfQRtDiy80LL+avwf z^=SwjCyE-iy@klVaAdvL9Ny|->$ET~Cg?LZ)oJGkGtIYO+aqBbX58Vi6N~AB+xrgX z+Z@aR$~JNjZu73>h!D-wjCC6J!ig{`YW#igE;tXK!mM$`?ry z*Gh{C64+*D;k>`X*+tEw`dM+!QHz!B(}##x6gL&_*nw{61MaqeQFuAbuS-iOrR<S@-7zKmRvFjyt6Y?+T4|EsT}Cja4F%(^Y=Gc%_yxmOd~ z+F@O1^<_%`BX)D-8q2Pa_LeVndvnsxPbzrf_I@Qs+dax|>OEU0J3ullJ*jxz{q4Vm zyZM;eI!pza+5PoNQ&CC#*7L(XoQ~@taHt77KD8OP5TK83>eu@uf?Q>F0Kqm|P54+{ zJ&$Tds1Ih(o{a_xL(5tcbQ-y&Vgu*+u2J#%+_2>+1x|P?lk*eO3b)Ix*Uz0hmk}qc zCagm6X1S<^M*=*7_*Moc&SNHAS@|afwd5w;mg|K)k7LDMC0%Bpnc1aycS-Dz+JxCB zegtJ7KVHocxnqeW_kbK#6}2Jr{4kKm~`V$dw6XtZO`*WXSmN>!Ya+h80M?qmIs{F2=y$VpId2L|3M zFG@B2&hbli~yKtzQMaNXJ{AsV=wbY_@-oqXUN~=yJwDYuDWvG7>)2>0Iw0@ggug@Ft@0H8`SWiID?P*` zxKS-#x)j`_K_$}S+eW(7( zIA`ctIN8$~e2kdqWU<0J(!sQ%)T+3mmL76-=14`WT&%83VEa2aEFYt=Il1ia$>2=k z*>?Asq@vA^W}PquP74oE@+t1Y66X!-rQ@_8uYK>H?4ji?UsIg6?xV2w&FP^&P`$Su zQ%3Iz7WEN1pZPM8$AUB?Gy|ls*GL;B+h1+SxQeq|MY-UvH9E7mxA-Tn)z!V+ zegK9DzF7pmzc8sz%`HW}-!Mc2)w=Zfp=p;qHu1wW{JpTVIS)^qDvcNHhh7rGRToekN_Z-3bn zFHT6eX548BHj8OL^rra1a?5R(+N13Jh{zMIJ7P%2OP;fj1&U=SG-}zlD8zZFS4@=x zGo1JA3H8ZXiI`?SEC^19$7peUIdz zIGduz>yM}ERE=N1Nb%qRz3)O-j?3UUnZ?WnaPwO?Oj$vL&_YV=0KhKrIt}TX)H+QG zKe*h-z^e#9jtDqs;S_N^bm|0^4xecUzce-;pw8uaTzYuAxkY5e;xTGBP+d$j$2Q@* zBQxD9=p~|C$pa>v#<^0IU2Zqg(aE7CVjau{P&j#4q%E>?Ba5mKZ{S=y9e7Q0du+l$ zBGtqUwavKHt5ZEw>h)!$n)k#qiu%YOSja z>Ujx5>`^w>)qb`QYNOy8a@O+&mXVh}`q=0rS7Tl?P|W-4umz%!oPe9p9Sf9?N;G|O zdAXIz_}0Hx#op!Yi@11ke$HjsW6k31yZQX^tux1uKjs{}*ykZIXWR!8_=e8ivxC7y z{O^)E4|9_by+yzD;L)QsbA(&A;+{2+9czY7)Gc))=vI)d7?WPP?)tc*oEt&^kYn?# z-`s@3^a1!aAhWq8I5F#M4C~u8$?LXnrAVXJ_!sw>rN&u_B(r59!a738w~~Vzs*Aou zOFri5sYLjljEstyocixw@-Z$hj@PmeNw`Ws&5{zm4DT_>baX_#UUN!zo%giLxwa3q z9t!!suQM3YApTJ1!&i*SyNZDu;xo|{q`6$gMu!;-@!Xc2SXl(w2X8G00 zMb|?2R?38wpLhgr(9Zdy*TVa0hxHU?l4c|;=s!?|e->|pQz#>&gOr7ElHmf6{?tD} z|C$JC6R8o}etpSjhn#=0|Eki;SAN>e4zDfCU>5G5~c>g3=OmptE6WhM@*MNTrq%<0KmuH4?U`7 zeeMT5K-|ayin2%}=8wdN?aZkN3j>e+p+>X^(3Hyf@nc=ZVKR;RIL zU2^vPZySWet-9%N^71*0zDA|gycj#1B?4kvVE-Iv!INJwwROa$I4t7bH^OsQO87}7 z_BylKc+N=Ysu&S>Hs%59T!~+~!Ax%(@6tE5;OUBjk$Q=)QPC-XXfdY`4k-eOYLEKY z2Q2tR>JayU%xMv%vxjs?sq2rxMe`Qzgg{ufzO)9tD^YCnz-ynHV47V7v~;~2HQHUJ z1*Th|onLkxPwYm!oLA1#=$DAg2BkO{L~K1K^HxH7+Wak&rUmfri9_ySrOUP0q5Ft@QJiiXKzb@ob>{zBKQfJNveWM^#JpxMua*^tb7*a)lnRFTaco z^+b2tVfj5f+btn!l>V!j7Y`>rDJx7+_CEA4JzIWK#Gc5Tk=O04`{l^}`N+dt=}76c z)(QK9f@tVhbG>)f@J5y;Kk>KRu zq@TJpreNsJelKsCY#uo0o2{AcRAmME;&bVaqkW%hOS}zD$d24{ZOY>w*KaGi%ckyq zr?k{&Syk31mG|xaDI$%(c*XM^1sEJxZSZ6 zh{(<^wnTXj`wMp{@5indL!)OaPVIeTOIl^5_PddJWszSbo?p)zHq6=1N9kxx%(~G^ znZ?)Jd@j!39pm9TdQg?NP5Q^c9otLX-^dLMN(zd6=^o@{+polA^NDjdCR?1Z4)vCQ z6L<5<>_+`hFLHA?4|(dFK`!*}hwnlYl&=7S5EvQ6?RS?{X!yFR^d#_pVY^l5X+Fxy zL80VDrDT9nHPbAzc~fvL&MLn&*iWOrSzv+O?to8g_F`tpTyVzd-TnJ}a=-~Tx=Z^f z7XUY+kiWy0p9KgrB?IW6Qo4L#&=*GZA~mj3*u&927NT2DjEVWWEwP)g6$5dtxnJ*+ z5qk3cdGhS*Xi?W=#sI0Y(dyHc6~9fHvOi}uDx$e_Tj3T1E5>TJG!D*wOviog+73Ey zl{9!w1aABwuA+v?hQ~4Qw6s$*t~=QSK8@V_;gra^lfx>wnQQ6-&{D5Yd-I$ba1f1p zl)@hVPQ}~~D9@nY-UCv4%!2F$BgYvR1^cL+z3p757ASwVYGl7DTWsb71=a7fV31RQ zr(icB+#hZ$W+$f$^^s5tZ(_tC^P#WI%usq58gKq{)knYx7vPTe*i%ss?%G>IBJ%3h zn1mfB6}5r}(4?VmXqlK_#I^q2t zYInwtcMEB)@$yk?`f!E;>@|acnFG~>tu^}!+lx^Q%6E45Y#qL{?d&x{5P=tyfpZ-+ zpMSVTYc=!W0ExlEQNs_P`G-OnG3g9fuEcPuGAd#Ug0I+a!!3FP;~uRB>7RLvZWlig z{!rGZ_fas0y{7V~XLJ!!Hyv}@4lj@}iOHbOA#*f+Yd@&L;{OQtDjrz2?^0(j*S}yV z3n_6pDfgI8K&wH2H>d5Ub_{qcJkUle=4VnG>h~!^#;dBDJoZb^!XWAgOZ6)g^>ANI z*zZXh2#~OT{i~9`xKsL$L)QKE0ogdiCm zl;^z03*#S-Uo}X|j4IvrI-y|U4_{D1`7?c9Xg);Zt)c_WwsYCGOb>Go&Qef#epmro zPl$IjYd=mnYJU-$?%&g*>}fLa#>67xHkSGo-cd7JMN%xTQ+|K?rev%$H&U#HRBNen z=^G%Ez@urorN0G?^iC4%U~P=4Ahx7a+fEH1-A|=JxlQ_Z^V2I3<^5JEp6hKlZF85= zG^M>Wl9P=-QunyHxJXVPlYDL>EQ}Qret7^bQu8=SYi7`m5? zj>c6NICFBNdEoi^>+$yw1A`fsfolhecFL24>bm@gxBsdp;Pkn9F+`Q7o| zoa1$3vmI2PTdogDu&TDIt7vfbt+e0&lx96qGxL^8L3tiOWP6mIF;gFzgV|rK>XKi0 zA#F>8=uxWTZZjx)iK0x=#NE#Eu{w46$22*|BrFd7@rQ1UYrW(1B+V?7H~Xl5AZvMh zOR6_IOREo&vda!!I(m1D#EAty2_+W}XK$a}oYK}~T9*Rl97gXr?N!9*Nvdz1=Q&{h zj5A9$@Ak`iRwl8?TxoEd?6jWzOTNiP@8P+1-X78w5sBT82b}g>kVWTXXJ`HCEzH!n zRa(9~9Ubu{)r6PwgBdOxjQz8#IrQGWulwA`^aD?p`;MP7Hg3hQIwqe_Oiq@2!ePh( zsBd6^)XN^1eBQ_5fJ@=UQcxS)nMKv>Yh!*pi@N?~$2RRM&KagK`YxWwj`ix_U+Hp& z{1dSRYI4g+&vN=+6M4>dVVeAIYX7+%BmV3(wgW31%-DqA#(av08Wun+pX%t9BCxe zk$bKzRXGbJ55kbOH~4~gGjd0iZ+Tq+Wb;b$$|R1Zg@p-H47i)s2bDY4+b+GV&db)+ z+xPj|X!lCp{DU_b$cOD$zdtK0#`<>l3|MvcE8+*Ld;jLgxRta{*o&8P)C!FETjHR} z`N-JiMX$w6HzJtIzjMLQXGwo2kt76CVeTG)MCCcLBl62XFx7v}fF`}Y**Y)9)l+8n z8&qZ(Y&?5%>u}R)5DaFwq6-Pbz*Gr4WEd=;TWxPU=*GDGcz2Y!FBt{LPMk;umyu}F zNym{!dFpP?gM8h0zxN#RZ0OU&J)&O%DhXyimz(8{h*!4(kzX^Qbez#HX9$o%SV#v7 z86oquztw#Rl*5;YGjcBh_N61jg0!UVQm|K;=H&3xGM-(Y9k}&aMU}vZIBo=!8x$z2 zMbpKfBkTUe-69N2b>9z81ZLHiz`v4s{9;<$ijX2PNHdw-Tp>J z@gNAZa9rv4%P-Gz?{k|>Y7{ULASBoIbJ0=bm9fHLuViz|=Uu_pzJ;leWS-${B=R}O z=cY98K{`qPb3FRlcDbQO8|+LUaQoB6W?-X%0lr~Ez|Q!z$2qAe~4Bp#8DvUigte-;i&z$+fI5wJ^0X~j! zU5Ly+F3hy&y%12Xogp)t6Cquy#cT!*wyLp)y@?x^OOKH_+LvYYx5L~e!%l$y5Z*NrW+l8KX1Fgavk#LPpx2p> zgUkGHHyCc-3pY3mmE?kt&pJK5Ew;_-kQ5#Mgx(QhjD7u^!n1`>0Xt#*)Z4!D@wc>k z8TP8G3Ua+8>(?;;qKtN(zWdtcPZh26LvMw@vOdIZCAqwkB#_E!%vBwzdYTh=n`#@j z6zr0HLO`ywwYVWIZ1`S}W&d z6X|!Lzkt5z?AJ?+!UYYM0k)MG%innN*ykfi`<#FNLRUdDAmme1_3~p}yt~Y-E{^iRr_FwAj<2();C92#;nquFL zo4&fgO8~s4Cs?cn8h-DF5dm)o$%yLAnYt-?QE?qCAdq{(wQv3uq4hUc4c4B%4&e!K z2+qnIFehbXh8%7D`0@T&rGnShTu?AuqO$73w09_%xED^A-E7P}&<+)7rbCCg?VotX z?)my&L{khJWFb=`SMI}%qWTLXES1#4!0^hUHL+B{fUmt*A8~bcJ$dpZFGbK89AshP zNlAUIsWIV-=yA=Zqn(uk!)vXfXYGrfI`)NUdoyUpFqOxj6NS?Vtgz=yEo?3smycZY zLtIvnwb)CMmD)iyp2YMHjcbdY&!{X@nb0tdI!s}a(2gRzj zmJIromcf)<9PA;&8{XGP#%C#NM#mNcU6;Phh;qqMB=5cld;d zS9&!o(sm+-9bx5SxNOisi{VgUT4$;uGOxTGhWpN%jh?H%4cIC5z!;K?o(< z(8>WrhMf2IcB1w}5;bYJO@xahbOOOkH-Gxe5lL$8E?8KG*h&j>i^YPT<_ue);j;X~ zBH$Mb%J!RtxKwm%^77#rrSuG>R#w@rGN`e_Hl9Xc_Ut4^gmAl5E?|Mz{AXIH9K(o| zy|_O(Y`6>7W(2^{m&J@$FwfJ{D7~jnpZ?9DAMUkxAZ+b{xrew&4Y5_-UgzT@ql1ID z@)5}yeJfrGHb+vZwk2fFw z>^off3%m?0iJOc29=}30!!dn*`WTf#R%VTEGZ0XaAK(x9y|KlN6QyszevKF(|H%K> zHG_RC-=XK5pSfs(Whr8S;g!FHzM!{={KlSpy6uN}`Mv}H`F=t?eEvM+Q^qM^FGgxt z6b?J&EJaRf!pqSk3Wt)=w)v`B%Zr}0JNi-U4N=mKVKY6A{gmklEguyd-oR0r2MSNY z_wBbB$&*7juEod+r$#PLdGYSW^PI-UpxQ5AzaAqNB>V;{0qvEs2EL=M-evYm^|}#b zT2Row{a#){f%(?0MllvP%Cz!iQ5PO}R?jztEI~K1N>!zU{vR=a{9LqVKo^-bASz|-L z{BHp_`F<-#l!sRQ;`ir_u9j%foSu?2OHz+U!1M%%fd#L4^N4NCb zKbI|;vZmw3_J&E$oh3(H55XtonB;c5KE9eG4~|U!H#AJigX?bu2EMBb$V+ew7#^&O zf`pI{A))0TR&0cVw$3G>^3!5P=C@{N-^fOM)4F(0huowwrSJDxoHD7FG9R};50VS) zW2uOc6|iV#I~?Yom;DuzYu>l7wcYDbSM}|yyCA0WBu0yCA9o&hdV(p+JwK6T?!cHB z1)~0(#;(}CyOA4{AyE3uO%y^bPJt7CSGDeug{eGT(m47F_AQ}4P&K$pUsWE^*L}|M zOca=v6a@K_H0!dvcw{vsyvtkWjax=P5g}@`J@$wXXusUEPc^P ze|6&vGq|F$H=f_h4Sc4pI5wp>@A7RuEcY`QGg*#vg^mX0Qfwf{n)O-k2t|_EG(4v5 zoV0K$_^6~FHc|0hx5^8H*jO2BaTR?a-$Lae=6kH*kyveS-u%Jvz@aDKt+=!C&QWgu z1|<-4t4t@??EY(V9zimF#A-D!iVf!mfEZ!!5Z+t696Rp?SQKLqU8@Gif zcQHbUJ5FwH<8Q)pL}CQKAgIc)bAh(%(fi(U?>~i+Q^iHlAy~^1+~Bb3VPjZ6OL^2(Qu5Q&2 z+oA+U{m@$Bs}Xv5m4ni#23n5|D<{G9UT^<%FJ4e;iHI0&?RVAHZ_w-FKE_c`@2r~g zFC9Uo9+73VW_{MsJYT=_T)mgs~!=}z~%A5a684YSVZX*dFtV0MoAdJsK#1%##% znD}ymWz(#0P!{k&^YT-{Ind!jMxzz|X8HDeThNcsz-pjrN1Xfp+qdfm9T7+}Nh(#~ z5es1f%O2rlxZcZfmlK>U<5r#%xqp!>k6z)B6$;Il;ZOSkLAk|n$Fa$|)oW^M*d6dq zyg6g(c7=?%ws>$z#f^dLM$jFdS|?~7k9R+=UT5C9FXKBI+^h1M_Z}Y~1>TF2-GXWZ ze1ZcwIBW1(I&o&cqE7!qHOR69mipe8CkG7~!sPKQl^^FUM<<+@%tx1JPWYF7=+hg6 zj)iI>L{!@Lay6vc4I4(1`gQ+H&h>@FU|vc?IF&TV|4v())=t$@){@>9vZ>W5uaKpYIEtowD;ywJ?H=5zgetP zj3tuNm?310txzHRUTIe;yEZCXsN~Jq#!|KkZBmxBOC?IOXQV`{P?AbJQnX*UXXZ1X z>-t{j{Jz)u{mymH_j}Io^VjFo2fdc(>#^P!el@ zUGIu2SfO|!TqjWUaupcf?!66uiN;|wMbbfy;WV>4Qo*a)I=@fhH|M2&B{N)p-YP3z z#H}L$O2r2-rhj&J_NU!C52V?lzyVQ8AdC}Ga<`nQxMq@iIV{YEiiPP#E5-7_)+uBC z(S9I>#$ULK9qS4ba{P1prYB!KjhW$cp&hdxHsb3rk{fZEjCAPox0`iAC2cOqHp&{ zpsvv+6Zh_h;cg4g>`uP(;fIN^w8J-!*ca#1(CBXMSC(|pX=%jGPM_dNwCJ@@?k^cP zAvE)2?!J;t1Cx5Q@Q5RS7*0Ont;E>&AXGgsFQek+ak*?97XSRN%f9{lr`Q+yTTN|a zbiniw6vxW9v;)S*n67%tP6+1H15DI7a3nyqEp!raKpv*28;1Aj*>f;r6R5F1nIQ89 z9ytD#kM!D%3=HHOPLqblbuF#@GNhI0Tkyc*g9k&GNX^#h zlR7!XzWqc?Yh85+RKA9hkDW59nf&Z~G+S`pVzO;DlES0XHg^jb+{H>p;PzGSd;amq zvRRXg891C98sB10fs!jdz*jc#wwZZGKXoe>d9(;bm1@To4t9y9D4=EhDX}Enh>uE<=hXPwqqRE;^MxY_8&r9C9tmE zX~3~dXWtFDue`L+VS=lxZxftPES*GAaE}@u5w%Jw_!0zg3v` zscueBPp6HIPJYvZZ9LnM<(i%pggtunxO}o?ZrqIzZ@(n?_}j2@q{lLY`+2GqSs7&mEBmc?fU6tX;X zA%&<+o(yY8P4Iw==MD$}q-_y-C24p3BQ-jHxDduX*0EGsI|0LDSDRU-vrYD>++8GP zNcUz22J2pRQ^j$y=8?Lar8hBAkO83$cGurh*;RPOP^Ju^Poaii5bqejpCyyCe8cag zA0nMX)suRYK7;Z$1Y6K{ZTNQu9QhOL4!#UtwWz~fT}eAyIy%wqb(-FW)Zu%%sGM%T zvM>%Pa&pv^5`ABoNi>$?VAo(u(la);EbTg=jww|{>G8loiT{f^8C!B|#xg3?pRW7> zfw#BiVNcJT zsA^|%&NCOonU+Ct0XAVYzmt^98E4~ddVNbN6D77|s^!tp*;yjAN?-^3n%XMK%gW|c z%tGL5-?MUPz+ROLqF`jmcp;C&RH6FIFXygY(W`HI@L;||cLizN>sOo5FQ-_k(P`hN z<(0B{L-Ls&y($He7N@#O1LqJq9X=rtYdM=M^m)oOix%=J2hd~`gCCS)A{2)~?;DO& zHf;Fz#}llK7mSeC8RDTcFBxJG9Y}hh=E}Xmy(dWW}<+Zg@B^S5Fn<@B-@7qCG`nQi2AH0;vq#M z>ub78sSy-2ksd)(Go%U%8E4EqQ7#k%z=*ps0SeLbLzuH)x;y2ys+nNtFeS;FHs5$s zlR|G^L!1SV06wd^e9GyxuP#dQE9$o^_|z2++48%ue4Gwoqv_^kb+G3YmIeoChQik8 zH1PcixRj2UEj%l*hHMNrW=Mw~4G7rOwqtP%TWSWdrfk>`R4h)ma*zsCDq{eVYi&Dr1EnmTfkO5z}na*6M+U|6&4)mw0x zXBg9ADX$7tf9%s?&rEApQ>?do373#tf-V?2Co7LbAk14pFZ$kQ2?!t(gu6f&7^*bn z?0LSprvNFWw%3Hk`hCBUYLPex_*$>TIasoBvEcm9;QF z_p7~j_wL>Ea&yC`6J{_UHt`hOTl;thN-I2O`Gx)hZ&e{AASp31Oyls7DcX^DxG z00h$_7!WFk+6x!mlbv+^I>^js4V92-D|8O0Ni=m#Bkvb@TZrk^u7RdzLyL4<)HYG=pb2sG z`0?Py1_4SVMj$9VM>Tja{Z*DNi3pSpO-e^rb9bp_^Uzt(SDdkL=guz9(yIM*Q!)$j z9z<1V;NZc}(SR8nQ=o%RWX8gwKk`a<_9PT|TTgsRJqN;}12sd132W|A2}8@D407{8 z|B<}ytGtGcttGn|8CEP@v-MjZtH~>)g#(zk?hQtE7OP+GkKXbFA zV-=MzV`Ryls83xp_Gy|8WEWi~HVah))sjtVWXgtzy1bF++~+8|Ji2jR)^!}%`r>LCaAgG9jw<7_x_}l%Hzxu0n2Z8t+mjY&p<%E$diyR0WD#KhD)sR!LH7|s)gcb`uQwz|n zS5!lPbF9iQOx8rr3{cmyQsM)6o(-s)sdr47JlS-})G6d_hMvK278 z!U-XQ@k=?WzllFC^d&oSU#z>0i>4G$tAm2=SOD*-g_gZju5uHL2x6DgJ^E>9=19&i z<^=)>&*pyz5;kn_LC)8Sg=w3)z0gP{2Vj7@%SK9DF)3;@CBhUcHt<2`JLy(+iB-Sd zxA+Qo$}Y>v?qFFCJx#cfGM%ri>*W3%>U;ktI$I4NSZ`VAs|6Hyd_0xH2vp5Ol2Ch! zp-i^wR~U?t*u2!pTu|O`>ZFDb?19sBh2P=r8Ym%wZk3_TW%9#y@dg?B$s5s|pr+6|yoHQ`K{@7^}Qz11Tdl)w1}`Ubg!u9x*V&sYi`&z7r=e0jGRX zCs4R|v`5XVM?*kMRt3<}sZ+v?2L#)*!xN}|!IwYnGF!S3NPDyB0J4iz7YT;k%rJZq z)s?gZrMwy82_iClDMMHMi9|iYcxCD7Uri1KG3D5n;jdO%$BgsIwEDvt^+21Lo^T{D z#R~nCgXZO~>9VSeCJ&o%UN)ylCJFYAw*S-*?E679PKGjM`;u3$vG##fKoL?RDrZ%P zV<3X#@OzGt2j~EsF7z#5NmZ9OWPmYT5(CzMd*gFULB^oY$#~EAu|Mh~zxiqD(!8m- z0I8l$CRQ4@2&~1m4PTXbv5E20r1*~SAV(DZ^MX{S-+X&~iHO(-PGoHzE0IV*ds+O7 zbN7p@HG;~Ash+PxU$8S_wIX#tk6p07Dr#{0P(a;2s*4KatWRVFF>CW3S_B?YtYZ%3 zKzWjzipCorN&jv?9z<}fFl%lMg!i^KUcaom)1{sVR%soV zDf_m2x-8QamDP51NIiX)-M26@x(ZldP^!dKN%-KwF*B`wxvB8rZ!80U*-$0Xht=cc zjl|q($WTbRZ>9>y%IQgKTb(2i@Jf5sfi*11)LWn#p#R>E;&ae};EWD1FnRg&^FBA# zm&5q4chj)RzJvfQmCTEy_l_ncBxKYG?%V@48mF9{MlpVH=a}SWi!u@ecGaDXHo{V9 zD{O4mf+QT!LLwGQo1GLNCr_JB;b3L^SIbI1W#14MK$D}dbc6UoizKO7#`su-ZFK>*zIC&&tt(shFg?a>!Cf#GIq@<#bIR@?eJ`^i%`M@S+^wg#I>2^inqFHAO%ftX zQ! zsn{Mio`H1-9xL*UV3d@~#F0?b9Ko!++PCI3Qd z3UVl9-mqy<_*b^R83gfCaDdR$Uci~A-rs!qwt#k_=S0y~>_2vj=dV!&?X|oRp^7&X0lPvkGNOA)X+>QG{#5nDFUSw@MQ9h9&wZ6DMF=*v@q4cBsSd`Uf5l zO~FT1PhsUb6VIwIqX(O6rQOSf{B}0&0F*_|?NX?gKCA#8%nX4;lV!i7-cR7731i-} zZg-1;`xv>RtdIyw?9ZvW8j}K+%9NqiT9;He!(0o9NmyUOHtN|Y@M<@&$}&E4pTdjh z&szmgwSG1n4%@*tVFtykrM=84>cDLW5QMAJ@Hx}v`vlT;TeHPl+-~p@HO zFrU&XZ(_qyaoFr>G= z?{aL;clONc(vDe}2SGo(UFfKzvJ5cv&S|4f7Fum^W-YEc1a;d+6W|K0EZy~s6BBs+ z(Nvr(4a~Eg_ZgQMR|B7QQgeq%N;l?A2+estnO&a$ex2}?aVA&p<>kTujUmWO^$DCG zZmlh4?G_PJvMJ%!MeE(k8KEBF`+7~*&`2W93F=kE{*Q8RM0UAF|Ldtt6|X*aY`SUf zC#PXgeLhV|Y0_dr4*c5!8Q597yHyOgjdpR7Q}a~RwCE~5Sdf>=-`RU+#ad-X>?#5C z#;^sopcZ`PD$X0*JXkqG|FoQ%^_|P4@IU_e1E#Ym&pX*hL2u&Q-@K(Q5Sn>f1Du!o z&lnu`DCg5_c&aIYR2p2su8NY3Q@tuu^>!VF4v#-Hg)IjJGBZne_r0-M!%YI(Vp4~) z1I%)@n3Dw7%sn8MeetsD$~xth1OJHA5m(G%90@L@YmT9@ z7#{34^TdP6bi#yB63uztoRlXNUo~6ecz*sop&rn)!?keft|_U+L+fI*y=D24#{J{u zNBZ=Szpy^}#i7HiH~1w4+u7B3V$l{fY)x9Xv#bFBgM?`VGHNd6dv2;XN**$A*c4Q< z6HfUSUz%(`&eBPtWtjB+>(u`a=r?h}|9S86|NklR1Xlf5fh_+0I<{Eb*jzbenL+C{CF!k0wwb>k zQ|&?KPET1m_xtfj2jQ+kd`4}(Q9Xo0G_bAGI7Kj{0d=(FH?We>(AeucLFFM{K8Cnq zNG@_jnpmK92aUT)5iHcO3vR^}_K=|dZ{g8kD3HAHK=VTNM-mnZ zfZM2uk$eck$v&W*&gX4am2I@6i%nCfg8*G+8Bm@IZbuM!cP)B9gEI} zt>(*AccRFBg(ffFb?Nc)}*DqQ(GqWLs28Gmb zXX<#;FSCrk0EK%E$Fil~s?%YEsHz&K-Zrher!x9k({@CYg<^!np00&bC3F&tn0N#z zt{e}ltY-R*?TUW3$XKgZo$c9w9y%8-VjSKcl^_Oy2>P(8Tzc)=p?wo5;Myolop)5R z)j|bBIL!7(M!UYU3=sWOminq*qT~`MW=z1ajwZH30Ff$@)fAkY8yg&1g;Psp-=HdS znh$YA$Pg7UjZ4T?pU|**ZxiVKO)sw7P`5nT)HfmI|_6gxkCJMfHSz zkfghFRFvXJH6B0S&o-0Y!G%eOhV34?H`H|tdmUeiT zMp@8hb>tSegUNJQn=IO1-M#w@tx}9%R+;L&0r9S$Baxa5`DGP{_Mo|bR}T0Kl&+kP z8%q+7muTxan=>L@w(RT`E;*{&rXEhRL4{4dk;K@~IN9&Vq=Ku7*T2oH(2jusa&?ng z{P%MTZZs!7m>lNjol}rgrF^wn zvK-7ti)ul?(>UYbIYjo+(HChcgJSNg#JPU=^ttCe$&?@4UM-Jv&0^QVDUqos(%%DLd+-*@lUdRHemvkiCA-Q((1){+0(r6P<(g{ft@etuVKU zw|}Ury*lif%uVgh)7JKgj6k=(%sE{(Zw22Fi&KZ@l%k+rAAy+E(@pD|hr?hORUm{j z+y3nnPkDduiqmW%91egoVbj2#fT4QlTe-aMv5L{M(|oF5vI5(kQ06C?%zz*Va>Ax5 z@mBs2wQ=SX4f8N|+ly!0U&>2;RTj|^Dr(pqeOppe&rAfSrmgSfODSTG29f92F=JvO zECOcfRgauQ02>j=LV#@yAYVLPSWuCwQ&|U$foPR1?1knHfgX}kFIsw3kWy@ogeUCo4a`lBB zVkw8MbINkkMQovn5B)|c-VSmzuUPtJ%$|cBBo9_fkXf%pRlsg6WO4#FKdkXJ1CU4C z5!xodP6AP>--Knjpmi|xX&o`cb2S87A+`fvcW(V4L9#LVO0p5D6>PdVRMCz$NFMEq zI>-2ig{@FPvA5)mo2P$sL)`yuzF2u zYh>MT6ZY%IXef826l_oXmsG*T^0`~H7gob3tE6MpVD=ZmIR z@5UjPdGevUx@L$n7*up#`t_xZig4Zu*4Z@U81?A>Y){+i8Y%r8#khwX3er~ifJI;b| z$#9QlG0T&#Y_VZFWvLujqVI?5y5{yT4%oO6|Je5qF}%kvAF(7yLLwi_rd9UMYe(gy zMhm>tS-R$J^u zZb%64^aIbF7BwMLQ9iJc`invIk!_A3v{>X*+JVz*;UnS`iF>-5d<2rbSIX6^joniW z>!ac}&ziMMZcdsxTKemmj3RjlI{|RGB`#D&Mly+jElSf)g@5zp^n{YXH`0{38n22W zs4u%<`<=ob#g+hPmAHE1$YUn&@Z9vW8wKRG%Tx?`i`OBfJOJiIdAY5&`_FOf87zVZ zm~r7rf&1p)*zmwjyiAcp!U9X3{ONnEwI27H4N4BEr4K#%~=f-Sf3SmM=Xz-9bUMjr3O#( zh!xjAhP>LHl?X^mSbKRWdc=SuMLm4^XB88fAZ5r_qsES9%~;3dh61y8Q~f!c1w(`= z8Y35*X*|(Jz0TY77Pki;rEB-@`vXq1MnJ3?S#_--)%0Aag2J>XKHmyEfTb3(&8%a_ zEx-Jl*gSgNxUG(kExIPdo;C*Gg<~KLM_g7=r6TK{`=K?aVXR+Xe0>*@*(Z*sb6%5;NsSB%Ad z-U>x;edHKGIBd;nNoW8hm3;<-6LWHkb6#+m!BwwDiyawMZ2V)EugV2#S&$z}ORd** zoTyiAuI{O{4?jXyL)zu5AOH228nQebJKzXh7E9q=RO(TYMQ0el07&ZsW0IB@9vr-e z{J{YfFC%8l9ER0jaW|jdzyIU#VfOyE^--S+I1KC&f$_i$;nS+xYSX;6rcdCoFgeOd zs2vsIm9NbrO`V^<{Nnth)ABgaU?V3n0>qEHt+*pMKmU>;vrZ^`sEoC6dopV&2-|yx zRLF_D&)D&Jy2BMDGG^>pbHF;>2;eE~S3^c8 zCndG~wY1vL*H_^7+4#GzDEJ8iHZ5R#zIBi!BQYl%6DpKa)l zd8k)3x9@`*RXL{O(CA9k8mfBSqehx4=;5lW4X~r$@e;-If8a~5f(d-X^K($x!)|Y9 z{x1}I87&S3A`c@=)$fG0ns^fL*G~BEwlxfrt6*?UHBTU7p+jr|6==&wGN!lD?`JtX zl*-g0??4A7mf<>Z@+#SnjPB}q;sy!hCYLR zK*Hpy2VFQj<7yVT4?$-c&;T1oU;EgP;AB|%qS_@Kg866^T(EZ-k{3@*7`o%Xk6 zxtf;86E?D{8&NdhyVAm9M?}akqd{sMhvt=>i-xeKqNA6hu|Si28BD0n3b}gKg?pfW zM(Gv2G3$-o@Z{R^rMic=$l^q3vEZ=mQ-B%>Gnf0lO}ciC$2em6aE|GynK>o8PjvN6 zexMT&($9AhKRLe`4YH_9mo6t*%~~)T9#c}U9zDJknqN;!vh{8R_Hi+R&`N-%zO+ zaGFKGiXLcAg2_L5>eP!drkULeUi7Jb)O_9N&~BG)-Rfl3H#eUN9#>slyg8yv{9>*3 zq%F}%k6(2Qgbl8Upwc7FJgYs$)w~{n`C8WbxHu=q10*j%Q@(9By{k)0*GD`4=9`nJ zv18u)lqts<@>A;>+Bsc*q`9m21{5T{EuYSyH8r@L7vL=?FNw3zdWB8OufP6^h5^qY zBJLPH>d*bct7~g_oLnHf!&`p=DzC`Q&xeqj#Auwf4n=^woj-T(`=hVc#vK2NF?#di{K=H|MBF;Z5xV(A}sLPv|O}7!B@IFee-M?I> z%`w7G+?VFc$B#FwgM8%3N9GPwS5HK$>*c`B@|+d*HGCz_w`O$}D}Pv#n8G!~s(St+ z;67h0q)5Xw-ZTmf0?&I;(HdMeFh$f?bki!J5|L?PyA1Y7=4{fch&?dNHQ}%TQieI2 zZ!>yronpeV-%!2uCP48Hw*r`vl8PI8Z*BA~2^)HwjT@b?iU6cn9~s{pV7{ra31K*f zDTqF*WG3#mAYHW>3qch979ekLpsDR9&o_^HkGz7%cv@WzjTTsG0%?&=UBD?4vGzqm z-SbAvYVLxHCAM%@L+~TL9KwSOScUty-8NoLRu#|3--87BCqTMs2&NKEAJMgfc-|1! zn(VY31mtvio$?Pzrf(XCIn@#dK)}cVlux5xC+36qycKBDe4L>kMThdJU;-O5>ghugn+0;zR>0wei6+A%Jj~a zD&!Ev8T>a{|0*uElTT5K>cSR&TosxKY-+>R?Z4)(y$-qqEkP!Ru zCF}6=3LY;G$NtEz^pQ8vPRkb_b2#2S-?WSj7w31-WQo$%&Z&kJhdw^nIRfN)v(O7* z=g49!9By|7muM|`I;VBAjnxm{6iA#qdEv>O%H;m{d3*z4?@Xlx+Ry@T2F?+E8mDc# zPx+4;2?TozT*$ftjZFhhXvO)RGYDKwAZx$MI&_ngMLeYZ%^N-Vi6Gp#gehn4DYn`; z6Bft@Tx%3_xg8xBumrX6Skdb|vl+qPj#%0f3QRjZT))L6Wk(y^L+<@j**bf-T+nkh zuaZ6sIpJ9x&l)uMY!(y?%7nnBfo&Mzl{~uh2#6YJ0FS&Gd?^JSvyry8HXf+tJWns;7;snfzr|=EPJrt_cg-X+>CnqKr6gF4? zNGClmw9dcs?C-O^yu5yc*-VNjPdlkNEw7FgknPvibHJw|;TP19EtB4aF1*U#8OQej zSTdDlO~7M*`MR>w6q2HWfz!#i3Y=n2(G?&rAVeW*lC{oEcXz3HHm@kF@Xda5?t}aH zh0_l!b>S5|Z;-hP`ovs4MJaF7s+`$Keu8~qa>H|KoH2YEMVC?NBU~TGMvqrYSpa8r zz_qXqROE!M#$55B9U4hj`Xk1Tv*+eHgbeJd{py~_jx-4}!hQhTBMs0^O8W&}ZngWX z!NJiVu&-ba83T5hqy;Ii+1Wd}c_a!m_E{Ixp-STt@CK8d;^G(rDVdK{P!$+EMSTqI zj&n_c>cojxIF2Y!1HudY#a%QgUHnB&Z}4>cHcmxue+S_IJpR>VQx|P6^w>!+@;Cxy2TO-?f55H#al*5+x8G{=}8vl%IF0PH5p0*uJ;#b9>+{aH`?l!~oC+e*?dC(KR?Z^>C2E8DbckS>Hq5cJ zi@Ldp@g}j6jVz1&>ap6sjNzzMU21uc6mF7t)jCq6w};=Z(lA#0${>8bQ5=GM^cb{f zvur08QGjSw*T_t@hJ=&*BsJo06<=6wtzEQ+XM~^kWacK!w_z!I?B{Hc#JCVGZ0C;m z{*Bcv8&;8w+c&zmfBM|~Byz>rE5>8f#FKL{;tl4C3Y$}p;*M5DWQxg9IXRo?yNl?4 zV%vw_-~;l))hi$SnTj7C2Wm>FwxI_gz>g4+EN5fU4C8n1s*@*A?l8(&+{dIe)~B~u z!~RO@%xMY{rJcX!D!;|Z2$nqKccx_2_1I_d?Fknb1+Pe0l3)E%c1WbCLvmNnw70Ut z1jm*XM;n%0t3vwnQbQ1;5Kzcbe)toD>&xQLUA}fB`*BIm!m0AXx{G_%)eawtE|6w=w3? znn?BDRXQzVgU**r&=Yi^50L~>#xl_gz>oRpFb04eT~SuH!G5zT>Jj89doW`}2z~P^ zjZaVl34P=rwGS(gs;ceS#}kPBJ^5Y!r)a!WRu}qAF`baFW`>} zVef(~ad8GkS$vF$y?yU^4j5HUk$gZyrv;?;Xr-Yz<<-E{9ZEe&W*R?!Je2^SY>1lU z=4QO#{@f8xmCqL*96b96)w}jvTGg76YfCmAVwLl`-MNVeN>Zi$TlL}1eKAl-uwTi5 zf5rucgN+U-9Efffw@Ps(*C|i3T6d3QVuD@$msEHK^Fn!G@4sk>-n z-5jTszJChx%dSaEF~0pE8t0GkH^uvyN!@4oVlxE9N*F8%#PxyxyGQ)e34Z7izF2YDAe25nu=K zfUu(Se(x>pVCbd7ugpeKy)H9ocl&i7(H3Wz94pxHpav3BnHhvQz&RU7niiA^mPMu$ z7a0bs&!BZ_K2Cy$*$+>w{R^2eep&crjdxa7 zL2awW)3X|ZkcW**PM%Z~SZcYcr3>H7NRF;Ew_>%tyr;fobyIN zZy%GhS}+iL_;8`UeI5B`)4AzzV0mhIOW||RSjFUr2K&AH_a7NNNr}F0`QZKf_Y&Rb zt<)*H{iqEU4o6O$7(91RZ4eKGS#hy#pT?W=Q_|kOwhD6j$o|pPOpIsOZ)&Xw2?Xir z2#kq}OvdX)G<+i{eGE~oCGAqW@umqoLoGg~i|rsI7_9K_BpzB^WE8(aA()#-asCSR zZ|dSg>mgN*;S4Dlv}_T)>)gn!)X2Nxm#AAJ<4xCZI(qC_(DWCIo#1fcbP+k zRXn*AB{gdKGsiSEv{QEO+{t^VsH(_0&VQlDKYH|Ro}k}`uJxn7+Fn;=l{H%-4s?xB z##IzNfZkX@&ZWRN5?UfkN*DR1Ak%X_)Ki;wJqyix_a*3r#SrFwjn&UQzVOSrffpoEuGi;vzp;M9SEolI)F-FqgoJ^rqBdw%Te5A9OvY~jT5(fT z`~1>zWb0Ab02eP@@TfZm{e%iF?T@~{2%am+iA0J5F>htc(_y1WFH)4p_P0HVCyBpN zzM@7lB4-G#eW$<JsOHg=C($5?sTk1( z9P%zOGbGMO&ijsWb-{!7R-e};C0RHk8om)yT;rNQHm?!m^B1$xu=-iAt3W#yJxA%U z{r%^deilG@lT9-<`@%wC>6%<{h_xzvbc^_oNgoHj6JJWFF{vuq4zlgov5~}d-|>MN zsYU)Kq@vuw;hKF}zNTtUBqh>0lbaaAf8~CD!Q78^1!p9a`k;SD+R8}nl}n2x1Xv)$ z8TpEHUW6Nx2UUvRtSj}PRvwpBeT7Yg9l{g^XK63+OJm+>Q*<-AK#%q;UaT^(Ydml# zZ`Rcr?i-jjR4fUz5VM*g@=-S#twddWgDksV(EWktySh0EILD%89baps zwz=RYTWW&M*tNzceNi6h*1fx+%p#+~?Ep8^NL>vOfV`YH+k<@cKEg)3d16>!=olVN zd^Th{UT1Q*u3bmZA8Wi~q+VLExphl_r#DHReE9zCTW=i@1sfiQjES2j#3i%#-+Wx7 zz>@Y8K7}Z}6YLSZ5oP6=oHv`xYLBXK?&bNXIG#HLNB`GV9gNBWt23?N*3gt!q~OFvT=rUt&Ym$dkqZ<$&-Ry&k^1SY2MPUy^lmzGj- z(JzXu42|F`cSG+o`MFUnVNaZ(#d8G2_iA&h0-Z6MxuuoOfh*pNF{Xe&Jijup^!}ki z;2&9Vyc7_Vq$DV<+d-U)H6+>>LVH0dT=0q8NO1@o=VzpWZy^Q!2UUDi3v>U|jDafM z5NM!vk&)+}dD>~=~2;_R!h^<@hBDjA=Li39$9ACfs6Cqys zzp3V*2n>wABs_Fr_L_eVxS0}ONEt7a6jkTu&YJX6ddP(f2<_APKWz}jO?IYsc>+EC zD*#piY1vuCvECNw6J{8%nRMiIVJwf{7;9MNq50s$gtu26gE)jBBU%if;5H;kF^N%-x7d}XApXJVNn&`skP<=)bD+|3l0 zzf(ek$_q&ZD@|7u+20ly@#JImgtFY>CMEN{u`O3676J~Cr5LToQm2*f?0kDv&$}cH zt%cu%ni%u=dT%mj@7}#}A5d0aEXW#btgS2P(_w2ItbCCWoly@*qSM}hK|ItKEzd!Q zxu3qk6H*im&yG_^mLCY>G<~ED1wriEE?jh63p=?b^d)i`)^-v3&S#8E`tS5h#-!YYf=#gE9DC2-@S*f)`@vY=sXcjaAryWa3yVFcgtm}jZF|H6}CPpMVUHDVt z?Mxaz%YmUNs34*1$N@e#-!lqry-q|Rb zvA_2nxv?N_QCl+hr-D86#I|77!Zo9Y4_`-x3{C0yBi~WlJ`>kdEGe~)O9ZHnz=5C+ zwGUNL5cXE)meYi<(pUeLC%&pnV#ZWJp2 zQ+6b$9ceL@vkavYgeQ@!YJ*VBVF&o#4eC*wUZEhh4g|5?6&B5R`)FpXdXOmzPyuT& zC^?_@9ML|N09}*s3fOPC0Mn_kj9xQhpUO!yL@l~CQbOz?G%m3Usilcq64P&T695hG;cYsZRfo&^th|O9V1-(H0sy zL35Cn$FKw#*SqEzaP)g45;)S}Te34vI!QByg@tl6t=@Nj9XqnLrbsK!z~m@X>>yjJ zV$TmaqnvuJarE^^7W>kQH^Ke+0JyCtv zT`TFWHDu|vCEDLD*sErlAuoLfZ>V|x>ds*oAKARumPF}G#*H2=^dvcS5q;M!UVJd4 zp8xd><>TJBnUMfOYyg&T?8?c=?Pz{C_4Km3up*cV2Iu4&wo0EOW-SUR2S&zs%J}4I zei^H7gKAX8znXUWj#B^ZYBAFF{-xE2*BDFkj*D)avQtDw0u3+aORA=gRY)WIJc+aw z4V(0$ePN-3@5FERmYYcm?hIGOwF~dxB-sKX&(UGne}FG!QRW} zJ34j2Q!y}z13V!(hRE3vp)0>IIwyC;HPK)4;Io@6-l1Elx_TKPwL61y>rb zf;nCN@GZ7J-tCy+PNJ4fUG|9j;j%Es!8lfR)vYeJ*R!%(m-U~QeoSRi?bCCqENCz7 zIyc{-(Z(_Xjd_-i;b-Lp$5GWOcjN8rBc*~T?1>6EGVSBTf3sCmI$BcHD|{SI`4_kV z(dImSp!{R7JMQP7Xa92GH}k}Iadn?gjF3LV2PIP{FtuKqtY(DrDu>b3?vBD7noG)$ zYpSBI=zkv~;qvgM?}2IWpgOCpqsIeAp8LGeqPY4aWToe-E1zvNDR-5wQCV~~Pb~@Y z9|7EJGcOgjCni6OwY1`md|pE;5|7qt#}N4C!nX-GyRCm=BLLC^t!YWqKg5;nBG1KA z$<%f_(;NPbssQl!A*H=x+zgB)$WvJaX^?Dg`_(D^Vq!8QCg)|YYBb@T5RUP(7= zOBNO!nCFaLWB=P%7Y=E)Jd@SRZ_D0Cf(v_o+wk(q<*E0sn@ku=1rnib=aroE|IWYW z)|1l>36H;NRT;kTYcusP@6q>@z@@A^)unaYfBb*``x=#Y-r;{+o7Mk=t9MT9KS$DX zvaNl8zFFY=#;Z}me3E2}<9DlxkMlosn*B@%jf0g;e+a}v*sgy#8(oc4Mzna}l zy^xWw^Vg1FKhFL7M)VA~f_b@q>(| zYu??n|Mrc`u+D#Ur~V#||8O`tf|kEdaQZo=Z^_nmUw?B*@2&sXEi6^q=Ovl*W5N%A zU!~s5|4rY)QdPA5*##L6Yu+tS8T8LTbCQ`qKfbp1s{!xp2qV-hPnr6qg{G+moUKb% z`$J=&w%@-y=bj)w@LCn!G2X&Vc?+oF?BM&s8E0@>3TaN*HX?Sv4I{5Ju4O(f!$WT{ ziG&hhd%fq$Urn}9|555rnK7dsko7_Wnz8IEW~pp~^;eV91p6QAVQ9TXxX0$22#t?z z+X&3RU}`fLoI`U`uMGWs!052HLM?f!iWXv;AY}-z;(iU^%*O(;M4j1mq&4i~7^hl<>2CUlEYASh8`uJ6c{8&A>}W zz0MPIaE(M%&?`!ug7x)c8&;R`P8de|#K z8^>7YzN7Uou7IvdAu+?*=V9y~JOJ^ii?|-Vp*I)`QOX$_n_uWFgeoe256Tk_J7piO zLC7Q6O_fNbVeKFfZ=SC9j$oWXWIh8pm0DI>34NM#C++Xd)y>+C*y2kDC(4W&-5F#E zzVh%1Z)3Q0 zEd1NY#;DEQIF7B4O_ad*m1-B2&C@@JR&NP|eItgcHNeYc>6Yi8$Tc0w4&h>R&deqR zGz{sNEe5#-e4y&6fuW-$MaNuO4YS4N9!zbO*MUb#$cP6aFX=H<)pDThpNZomGODxD4^T#sP+~^ z_^wgUfA1!J&^W|+Lh(`?gn^39Vt)aw6|i3Ffca{h#V=RX(OCJ;%de6;8?;+h&mh~# zGU24g^oFg5iN48lJ1buaIl>|NMb7fsMOstuni<^3rNm8!BDk(hnAjY8bkKCr)N=(l zzBTqsXih47to|)#edA@WIY;S}ugBiTf*ZPR=7YBGjo#u|trX|lHEM~U z+zsu9gT`&02|b;hY%CJz|Cqh^?wmw>m7<^2x%tB=1wCi69XTK$!i(=gzbg3HyHdhH4JfB&5> z4hMZcMtlu(UF1>yTQ%z38QrY|*6g>os_zyYJLFKi-A~O)F;?{#)tn_upZ_s3p=7GB z;tYiEo>Gm|@9o*QkC*czJODHGJeCNM{@9xjmX$q?V#}X@uABa{@VZHP=BC~Z#gr+Q z?lSB&7l~>js zh_V9UW^GNKXlggx?=vQ+u^T)yoRt}Z1OaO3w4)l6w7emSh{tDbWM~)~FZc>!TdVqf zo;sxgY1EHEFWy0!L5gyuw!hUwMQgpo>UpDG!qcmy0;$)>1bjPX7^(mQ4v9)&r@RIn z98H8ku}wizXx==O*T?XnLi+FE2rKEFiJ-zSS8w>t7C62M>85wSQIn~9msR!YtM1`9 z$nXQ}YF0@Rs6~igad2Jsg(Vq3Iokw0!BpuJ1dFOiSn4QZO9pcxypkdl{f@dl5^o{= p#eYH+|H-NUjjRcub+kx26yslJIvB3@=|p8Mnx{QCYWAw%{|7bOad`j$ literal 0 HcmV?d00001 diff --git a/media/images/cutlass-warp-thread-tile-structure.png b/media/images/cutlass-warp-thread-tile-structure.png new file mode 100644 index 0000000000000000000000000000000000000000..cecbbfd44a06b44acd78d9926e69fa4c510fdbad GIT binary patch literal 179689 zcmeFZXH=Bg)-{T4n?+kO5fqi65*5iRm`IW&6i7A{N)$wLwhf2`2_ga_D!E8X&ZU9~ z2qIvKfaIX$oYOZS)HA*_-e32Qaqq8}arz+LSoPH2Yp*reoO5l@^GY)G>zUTm(b3V% z$(~iEqg%&CN4F|>?O*seuBlq)__pGvs?2G+u&GsVVGun3?Rqe}8MrH@K$!X|UjC z?9ihH@>Rhl;<HqVCtjPa^x7vGVcxoyuD~o!gqa<2Bm`^|7erD9r!EI_# z@7lE=?(Xi{*`l=FzhAp`0&nUr=H=-*GBrhYU6{?a>67+lsmEWel3dEiOH_u541LeJ z86n}~I^E`Rd(Gy1ckaaf`3UZH#K*bHEiC-?)2Hh{KfieYV0*2RU`PO4*9pgQOIuqF zVgr}IQatd17QN=ChTP^yK9DHtGY9D@nko9RO5u2?`H%cm79o=_y|e_2mck^*@shLB z($7vfP1Y()Rt6q;$SI|lXX`Tl)8AxuJ{y1DN-jHTV)yCj+{>`=?wUK8m~wCaP};6v z@SwjwZRa7?DD4cBB<+mZp4bR8Gc$G87Wclolq{p_)AXXRD?*MY*lF)RW@~L_rCRB3 z_oKTqO#U$s0&?qLmn0&lTb{aU}=>PV*v{aUM zSHsBYC-GPY)W!Dq8%TfWA8X9ANYPArrfn(|YCF)NrK6J|zPyffZ>$jsZ@AMkGm}-L zA3Rtq7`k8Tfp!14k*OB9h@;m&_1okBo(dY*#HmEC-o)}PK{KVqhj}N@h4Vt6)$;8} z)-iCNm6D3rj}0_%osptHN$evQNX5qUy>a~JH^(xreDGD4m^qu2!K64hIneUC>&5Rk zW|l$>rI>y2-Oa`NnWB}}jd19|*U->V!iUDj#@gE2qN0wnzyo}Id|4JP&m>$NyUK$| zY4(Vhwe<9)0;SE#$;mA)Q{laN(rT&XGFpOWg7};JTYev>H^+%jmf-2_J>T(AQeEOX zjY3KglsH!T@uMkLH0S0I4c?3gJNXyKd(>!Bj-oEJ){U9wq!yvj9lY94MQ@y@t&sNq za~ivE5gX)l`*zWATMVOzcMcoy818v76oVGe06odQ9Ky>n8>pMG%sdg2`^b9;b=M zuSq(6U*aQYldHX%gvy(n^=SngR8&+hUApx4?b}L!) zGAJo2r7+dxE0~$}XPDMSWl_()Fo^r zNt{{Ibw1?iHEWmY&APg}S3f;H6sZ@l8m$_Au59o6?Of`wF1|$ud2~?arSr@vE0R94 zX8|^1>_^qd>@x2FUuHq0!&fVwc(334lwrb;`S=k$-0j0CEsRsQtRg8;+#cK zPU32TqhnnkV{s;=uf{uTk)?tt>|_P2v+ufO)|Z5C9?1K;OS)K+XQhR zPSdZ6_$)OvM<*wXrd%6TT^}YPE`vhn!S6XF8EupW8yg#%P0!E})w1o~L&-%^ByPOZ zgB`pLiCSsKwJ~C1Vm-5g(gK$eCcQP$@;O#r>Qw#x2sMPXMfJCJSVeZ!v?KE0iRhf0Z)YxfwKPaV|K|E|LC<8Bk8)K^2I2I?LbYi z31$hg!6AFRoo)aA(-{i4{rvvX%9}-o#ZJ&7g9A@}p^*AMu)5`=%q2Ul5B~1MBBt8? z%p|_LB2=hvX6#35F`l|USud~sy;s76&QhM5N)MzlT3#^S;x7Uz{HtOctCfN-&Q`CH7&INI)#V8OtGb)JdoTC4J zDPfCd7A7X99Xo8X|0#vDTz8B4JR34h4RWmy`8@oWh}5j>;eskY6Q#P9!Tb?CkyzyT zm-b>-oj)71hH!w@^gRr3^%^`9zWzNMKLSZi&F<8SNhFsABxocB%siis`+-skD09B4 zVE-R@x$6M_WWNfLo1l}GJUd|Ebhj2sF{xLTVfXG6S4zB3-(4SH!c-X>;hKBp!zR*# zqpqHw+rosV-sk;p4A-w8N<~y1P<}S=*;N^qb@A=LyxQs64r4LNn{%fJ#;PPWD+NOV z5{K&3egIt6gm5{o>$&uf!@aq=xue8KUe?t5Oz@?iLFWso&P0M>Zc^N`VcS7JHknO& z!RuUSx^{|)i0HMbwhj$RBD~py{mWX(WCSdQ+3)&=+!Y*QYw>}jl(kJFp(Y@K){VsmS{+ptz1QHCb0`wA7_hM%*?MjqD z&BEF93p)F?44Js6rJfXXx;u8f-tYQ}C8Ey%cidj=t_VToX|}mbk5$XC?v`}br9~eU z5U?tLbO>ut;uQ+T!kIUu%aN#=0PzS_C1CQ&{(79iuT<9O-kq&)Z)5e@*dki9T3o|W zh>{6YAgk5EXuCxc}kqW0}bQC?)LcN=Car9$<^wr!d3GP?HtiNCSB8>oUIg*#WWK)gfvGuO=-|Ph@9u8E0rC_3j1PnKn6R2j zG5TCk;oM*zdqT1=kD*p$?exHJ+|w+ue6$c9U#a;$c|(Qr**z~`JbCg271sZ~~mN?`e zd!v~F&JY$B23i6ZQr)#@&ofJpGmMWnitz9#lzd7b9{D3l1~}}YaE#S9H7#_O1p?!#hcm^g$Jba;7i`gjhBOLjr0NnITK(9@7`0$_ zCV6_~1M7(sCunOqJ~TAU7HyVP*E1DdwSLR!k6hDC_ zwI`e9+Oz&9b{$;@s*WMWQ2XD1{~eDt9BM9@bx1p?7URt$N~^U`b&m>4^s8~M3>BK4 zs(NJMGk&(*#x!vsmO_zVgZ z(Um?wbsHH6fvQbPyr5F&fSh0hd$8#G*dc5NWd>XB2A? z=Eunt;6KxPd$W3&Jz(=A(&f5jJ%JDEcB`LBx-Ft1rL5LQ?F*<+rqE~S1hEnkT ze5j=J^m!8XDNm|J;f%CUZGhBsR-Ima3N8{w@cMUAdHIibzR~=~&9Qel531(rs)bs8 zgH3s8ty9ElM&i~#{obYjJ{QtnXq0N$!3?~8yRR%rE`?%0(!O%_Mp^k?lj2{K_2#hi z38zd1vCCBJo&btd4!I)>iISG2V~3n)H~3wpselo2z0mH(|K+?8Q%Kc@oqXyxtI@WS z5;S;e@so@;Mfd3COP5q_=ch*iJSdM-c2T=Z{UcEg4p~REB7@7RrX%-wvr0!Y!CuAFr%&4se%DGR%LQ$m zKTwm8vxR={byn8q<&M>&XBRcYtbP<=gyJPqR#ZeFO_R8W4ew)=!YREQ@x=Nzu@moi z8tiJW88+nM|GC;`JeIyl7VeiI8YBK>+}_uB40)RX3glMFvPXy1P_b@yma4=kgy5)0Su%d^lTDfC zMY2LVP^Vd3K!*fOKy%NSvY$u$Mbf*BTqGbxIhKUvgr3>Ha@riNIE7CLE2C1G?l}Il zZD0VH#2Pq-fXP=I*sTz16u#F$-5KVM8aA=j>DQaY2aZJfITV`J3;b@i6Wj1B>?2{` z4?R3QDDste)-jAXx=uCO2{ceX5uMlaSH(*FDazBs;{zBH|D)HG^bY%>NIuxkZHxX0 zweQ5%J{-}h!KOyES6H!o^vtek5oSiaYBOpy6ED2RS&CX%VQd_%Kca*F$d^?znNllf zZ=dbu^$}H=PdD2VjUsp@{N=02`+=N_0rkq$*lLrR zOa&2LjDkjf6j>g*d0~LK^bZy_H$1m;sLKxouH`ox3*NDq}B zd-D8$tb#7k_C6;Dl*#nWObr|3fA;KI1V`Y8BZ5S#R`3~3m(JOBPiG>^4of0kVdJ_z&&7slp@nvQw zZCgiH3UyKYv45}9+;ArPTGx^HI|pi|rR601w(b_UK3|~JZ_}QIrBaVqmCE>HgueIO zpJSLNgFnS@SiAND)wQ7f?{py3;m$G!QutMzH&7#my>~TGWe}KKnVA!EWj7BUclEk| z-_qLp>2U`uY`Ho$AP^)I55fi|5l@J&Le|Q-^<#V1zBBiJ%A7(m%K*at^(o97GkXlG zf+wdd>+0-qKqy)dMs(0jk@rvrYtHn2O(LZ*IE7Cu7K+ZAvi&(@0)GJv$%Y38)`He~ zF0dj=+J^~{EUs5@Y-&o`Dvt{0ZE>Q`pt`!6;x#^ey(8<@t5>K1nn>tf> zoxgGc01^a>8ih5Ll0)#gnYRbC?KE1zx9z-C{ zP=ZnhVgihg{L<=Y-8-oOPtiwQ<*hgj(Vc5&n$_bpXe`OyUQN%b^fb;feAk~f8$P%E za3C-aX{AOH4~MP=kg=co-hxOHF4dItFWw6*SsIEH1j_ z+9l7RAR5w_dI|&E9}PpFpev%Tqj%%_=Q9Y*xAAJRi)ZFJ6~RkI&b|cylYulh?=g2F zrd98~#p~Cvy>{@b`ecj(w08v?phMTT8L@it;ssDV$`KYu&f)F9t4K0Tm7ian!P3R{ z*1UPzp2c3}8kiv{AfQT%=Kj;Gwgw?-o#X{cL6 zJ$TG-pr3A}LSq{!U<9WoLkf1qGi2iTAe|(XFezedkXT}RRRbrd2c}0lX1&g!be#qd z3%oMU8gjK}q!BSQ5tszD7<-sEr2}bqX?{!@Bp@l4$tTeVp``KOPr{?0pN+CXJP9(v z7;Vm|AnB{M$tzq&%LXyy@|358c7r; zQ&!U~(>gvq3QGK94H{GPNn-i@F_?A{2$4yxQy?DHMbKTwLY#^)tZ+u7^mJ zM<&MYBVh{F_qVXA+i8Oo1FaiNag_p8U$J6E@0rUMysl~K>1Oq*(F93`O~*(1Z!E~M zWy=<@N9T>*zXF*YKXHO&!&F>S0v5{}jRyEk4OE;j@wI6GNisdXI8S+YL)W1im?Tzj zJ<4Dgc@c`(U}IK_l-CYQw8r|8MzGW4YuK^vNSisWtrvU3dJpO=D0uUnm6Vk1 zc@D%$NE_zq3gWH|08lh)TdYxCa(HCq70~{@`-M`!8!n29ik9kTpbAEN7Gz8hfNO(# zLwu65VEz`snEw*N_Hu|=G=EW!I@_>v*OskYQ=#x6)Vj-qgA@JF!0}BR<%%Bm@?^Z`%@YMtqKH z@x&=|Xix8j|} z@DDHr6v-1HrLl>cap(8RO==W6&n(PMdEC24xfGRUx1D;|!=oxxNM7h`d4!}JC>0(a z9;7ODnUVbbe9#HwkeKk7366|%>)Ez;kxN&u)KW;?pt(d|5~UZ_fDlQrHE2%|k;y<< z_?bs1(=0&B?>Q9L)K}`t^t;^VIU(IXIm~U1u3rUt2fr6C=_Ua>)h75!LPA2~9YRp7 zxCH`IE&;q%LH~<;`0xxHcJQ1JH$ekQZhdnLNnVvUGCw~Lu3oycRGnT7koI_aP#M_EE=a1{7&#md#P4%IKEY<4rB6LSS~9C#x^FE6uK z-25E27wt3>iGhpoLPE=(iU&|uKF-E46?3b+JPTSH;lOru9?I}{3P)ebzpFOjoYsJs z#V+?&Edft5q8xdVy3=~K6Tr@=ZX-xUEjOe`z7GDeYs0N?6~lzFUWF#Vwd!3A1WywW z1|F&BDphp-4Z-okX0)l^M;u`m$@nhDS7;ZTW6_1<@Y`< zGZd}S|K7fh<)XOuxY^*4eR|BR0}A57wnH`OR6q^MZYHI)M1dDYO4aizHZ8UQ&U2dgg&yLr>II||Clbn_i?C-##NInR1%Bj8`5F~vuJzsS`({;gd z;jkzkxzyXFIX@?H7p3x}3&?HCZx_Tbs>T7*S}?X4^=LMFi5A4ChxGmxE0XUxVRG@} zMIy0HFlQn>%7eHQ{6Fj41(~$&Yj1D={{1@;VN|GbM-j46e6Qvf7M6T)!kZD#bRc+R>MgB0cg^tJ;nR7u`T za*+KlUTh`M27p@FQ#62VKA!&jSXm=bdlu00BM)MStluea8SAc`d%K1;-MrBjHE+j` z9RhA(NTQq=XTfM8-|j04KjD-)Pt!|$EG^(+C;=vF7$~JC;v=>j$q(x_IXS7CycAV{ z7z1k{FCy>~QCoA4#%K=`9JP?lMSetf#Rg3-<0_#k$eHOz{K&O^p>`Gu$oLk*@$^Y_h^ZUt2=Yq=3MDb*Tt_+bjgk zp!eoiuCVpx@Ho;W#IRo%vMn#Ya~5@5bUuIH5BmxnJzggrvV?O@;gfz4+eMW-G_g$76P8YD47-w9gefA5LT$6i!I4AF!Cv)N?>E;6W-- zfNG-TpjH#Iwz}O23Mhz7w>7I*Q=0!g;q3CK+JoaqoGGHh8K=C&{-Uk-xqtus!f)%E zyEWmrIea2Fflw9__h$$uz;lC+st9rR@b{DLEg;-83d)w2!w^e>9`#}r7`G=zAA}$Z zlf?|Sj9)Kz+duz2pZ(*AvmltZH{&ph9Cup;A=?4Qavl%)!JDxbZ;vd0KszKhJF(-TKBxs0<5V;?>Y4zfMmlGB*GT zgagvf-w!;j@rUXn{>A@$_uTK7qx=8FsOP@1ysoaUzP^4Hox4a8jV1&zH}I>~Iw=kL zdhZoaFE4OL`mk_-8AjK5d>elAVyWN0Q9JBEB&CPGz8c5&EkC8-5q#v*3zsinhJvIH zsR^ahq8SYq#GNlnYnOkAyb(B0;PKC&KS9&OPouNIL#ZbJMYmTkez&&??h)L1S8C{xd zSps8WxY|p4Q6ujB3vl?U-U=>OcNC-$s})zOw?QmMf0Szd#pGIhXC8kGlY?56?3SY! zYCE~rUSEDc)$F9~B76$nHGwzyTur~dY6C3)?~kx!VLAzCp*BVls>iF;RPn2+O7La@ z=MhcZ%qWM~&MNJ+t8a5{HlSu+%uN9b1>J9j9{@#E8H_{81*z%jy5Iy+OALkJbJN() zDXVcbNUsuVtE7yGOe=pbJXH zp#qKy9S`Ls&lBb%WT0|^fQ(Pu>e6dtmBAV10#E0DY$dV=zDnKk2Qts>hz{uFq{Kw^ zyPi(-(`7!)n*9S+tJbt^*xo6ocx(kJ~oX9BB%iy6IK=QY4l0RY{(dpBM@a61dj{P2sW zX!+Rir%DTRDGSu4TzpG2F4SFIsH1)TI@^%pwP#P&m(Gw>Kx(AXW1^yW&?M~-+zz$y zpMNaTQCV16kjUkoUPJx;R%6{sYc?nTdpl)mZs^Kw;$a`2IKdB`^7ZRvGvJINLu}0F zM`GgQP|9dgBA+rdGXZSbe-To9i}n+d`NVX7vi0rKdkbIR*vpsq%+IA1QI{G*x&UzA7PsB>U!~O?#2oFPQg8m)I1;8pBout@y$zzv?=^mX z%4@pPehZbc58rqF_6z#G#UDy9+;8!olbXMpkM9%s?)_&Yi5(^L6kC)m>~}q?kMFbi zF8!NnBwG^4ZAZIEhbvx{zg^^t3%;}crnUq`iKX5!5_W@%SKT=h_PP+?xBMmo8Cgb{ z-M!HnXB*&E@jw@d@ArQh58XHVPSc+(IX!>%J6^Q|50pr}I#DL^b#gVh;k{S(xgH(F ztB&A-zT*24EZYBm-2dI({|)BsB4IXxT+`!Jy#Uv}-$&E1ia#SMu}o3psG&pcAK-?uj^&E#w2H zE8o4_51>E`Nd)KYW941^-S%-Y`O+EP2UF1;iW#m4DSTB@+e7&JM27#RQc|@eEV9 zZ0jDf%!dx}+*4zD6sMV4eK~uJ>FPNB#YIh&QoODdKmW~A`(m_*cpviWLn&)FZ7Kvq z9}y8TYgYd9<;y*iu4bmDP7{>7X=yaCopk|;4>(5xhEly}iaca2S|rlU8+~EYaf3Vy zPDld}{4_iq(3&~;7!`tf@y_zrSMavxC;Y+PKJ19Pt-Y>J*bsImcUS0dG&>_|vKVfWEWP4?9(f0gdgbr#S# zU1D2&e@m(nnXzZRO$KKtoGJYP8epE_Z-@e0-d`X2KXF|Dw@7G3`sPGr}rQf zs#&vCGA)2(|3|6EzP{Qs3W45kkj)HVcJ}c@QP6}qAr!AY`WmEM?!XzSkz|G!T;4%E zOd))=3tvk&@?Us$5f5u4->{_}2Ri_dT0??|X z-2!0xZ#4KAE^$y-&;~{D`DTtja~}Ef!yZVLrWfZA?h9}(zNUD+QT%4Qiu+~^loU8vd>cp;FxfBvu}F9j%<0oV2yFO3lcyv9^9Rlt=lze&a@G^lxDQ zQ?FI&1Lx!O6VNW#{`&Rn0A=%?fy&@!c5dDJ@?1yR)l;FR8k&pcuhfl8_SWtJwX<_) z?Bt2%r`=nGj)HEUdFRe4JRMfKCD#T|DjQU?Ve{rC*rGsTs{Yav<5cMw?B*B_ z6O8VR{wb3a$z#$yBxoWUL&%3t0^)kCiamKY7jO&gn_0Z%TU15JH#Rm*Qi z7vqjp>z4~qy^FVq^#hu6ut@r7ywE~bJ{407;>>hgS_DuB-@qii=HhBH!@ib5GPO0e z=^L65IHJMg#XQ&=I3Nv13vq_g6x*UawR`vO!7x5IGZX)RztOPvFhzdgBryNqwtsrl_apMMJ-+p#H9%E%(=gcq#02_n_EYDzOFy}%t&WDac zzgs*VEEjcQQpddHo@i@pqX_QFO8k3y z*@o65+qcp`@bVfP86k)${15)Cc4E41R$W8tjf(Of3Fchr#afi|3 zS1ZA|pfji1kGvaxah^`VH!kkrgwjKd2;o%;%jQcdFLK0YI|MO*;q+zLUpBK!czb%P zM2}$cqq9=YLmY6^u|jteyEm@ zeZ0A9Zo->|Sks`W8-oucj7iZW)-Fo-yRz*#mfubG7!ZuR3QP=7!wSVB--2(lyfUX9SI^dWa(%q``@>vzMFbg(TJf>H)`N-)Ya?+D9Z zD*aF5VD8Ghe}64zoXE8q(9s@oYx04*fLl^1f8yz*<1Vvb;C%=k>kxHk$20)u)efWh zgPJO}l@2O_(AtcqII`LZh7(M;aFCezam?%Lrcf3^cTa(jieCOKblOCV$3UDt@OrkG zi$hJnH}e$MFFfbmL|+!h5BK!{-5T_r>gI+C7L-k9IKc`E3b=fBGBI&AMu1=k?KMV- zqb$l3(A?6!D(9n11lGb&1ZxAgb-W0vL>BrBroV~w#} z@b=fiUxUHG>(gIc=-hj9L92tZ1=XI5`c`C6g>wz=q!D5R2%i)13N@3}ooN9Re{zGKe#9uBM=+P!->_(g4$Er=O$`%lyi!$l6LT7>0?3Zx#t zrzht(0juzt`C$toH)CKfDM^{5yS4Rsu08knrY6Fq5v#OY@c}Wo22#4B2`tP9ALc2T zRyXfDOlSr{y*a<;nr`4gKYzZL^f#R`Mi$+$2QY>;)Yr%FKY`N)nlkt{aJ5r2)y*KN zB(UQLVs5a7YTz3Zal$F)+%N3YW1lvnqo7a$iy%}b7rpFg3y8OrNRcY+DuM~5ky??f zCra{UPpwSGistyz(o!tZUS0>{Y(7reu}WZV{i#ydUQpd-5+TMASyJsv9~}y8d)RS$ z@&+6TP|ohv(N>%+C;yOu$qH5rNHcGT3H9eER|vHd?0HPHF=nbXDU8EP z3z!T+p-92DLm}XXi5MnDbPJ?4dl!l^?PnBU<@ndil|(E;jeNvKD*yOV9_xyY$Aj1n zG)y2O$#Zmc{A|n-pFe*NUhu$><7b#C$a89lRHTr&8heu*F0 z-cIu#KR%C{2p}x=YK_Z~(J&XS8D1y{?fIP5j2_mMaQwl?gse9TCJNxI0hHUf9}^OOR?Qg3rwS;TFK8DWA+*h-Hynie{*HI6gw(#ec;?RtJ5F z1t`ki?>S>jU3z6Rr~pGP)TJ1==i^gT2HBP`n4&b$mt~pNLnnHfdw&XJy=&I2LAIV? zw2vh!zlK&4x)n5$hlNokvFPfkKZx*+cGn;hp0NJ;86m(o- zC-z{P=RCBq39ZO@SP?TaCc=DM5MW;loa}^TmkqB_Y1M7qx9C6s)i%wTof=Y}U%e{~ zZP7qS=qH>Ebi9ZVqGYNb>v#b6UZAoNTJ_g4e$C%j6K4TO+Zz_XXtFro9m!|;Wwy>6s&X>p2*u-bTjpkA$indDxm+~2EaPBytVe*E!bUkAS6GnN%(#;at3okXumKH zCgZ)!{~s#EsD8xC;ru4W*-(__>z3k}aZ}muegm~wK@XYK*}Nb>e`5;I7RU`a{Lh^8 zU?{sj{Y}Mq6rvEK03!z+jVyHf9(j3rK0btG8c_KI^#DSiO8rSfH@-I#2IN_B8_6oz32$FJc_>rw@x^u52`MdP|1mWOh`h+Ah{y`fI*aDz(Ca@hcwN@PYvEX!Lm-{@^G{7(3NW zlfD91kXF&9wRU%BqW2`!H*IaE=Fm~Z14dJfXcQVuP~^tAa|Z1bQ;Ku~kJLq&tj4op zI072~xFV{}jgClW7$LQ?tbzTi{zAO!39CeI9pN6iT-NW)a^1z#H8A9?U$6c8vedhU zJ!#~!QLGeZ?d5|aB3j^2+S)EsjkgiGF@Y#AB@_}k+mKAzr{1u@nu}@}&E*L+ff?Wn zOG|EOnNa)>bmeM?++lx9LKLK=qyz>ATH68-pl$ouf-QkM`g2KX-vYpA^+YdTj7Yy2 zR4M_}fwVM~4jyF`f+2DcF+h`%p8n`rc3z$b1c}KMK0(1RZ{M~7YvGWyV5T}#Hbw0N=8R37XxR6 zuIBdb+fiW{;Cn}vRt^=6hoKF1qz?nH3Xux^$diUQ^82TDhIl}YfI1llG=~0)7(Qfd zgicZRug6?eVKn?UC+|O3XzL z)-+8btfGb?loy;a^3wqe3me>T)GQ6?Oy0iNs9*o3RQTNF6TD}zPbL?QtH zhKrydc8)MHp-iCSroOWJ@hPeAf;I=c$vs4fDll}@_dsGG9hQK9a87sbv;#cA$QcIU z6&mNrm|?Jh#aNC{D6szEQQhtr6)d#y6w_XIjT++sacLm{!e~sP( z2R+#EQIurr*4}$@%;^&GK@QmfWwf1iF&MnOh9uypR|V#hPiHmwtW2>~7eX<$YjFW$ z5+bdqv$)GrmYthRFgG{^_hf{uEJf`Gb;U&eZ|`VwYvpJ|X2HVXP=8!;Gj$z{=J;|> zs5s+TVqwK40VICMU9nlvqFHyzDgx=O*I)tmt{lMnxZLrZ*j$*|U@MTK=QMYP0~s!O zniqzm0Ko7MNpC(wCYORV4~KIeeho&)Pk_J3?}q1WYB^Y8F`NUtqZf=%n|q{s^KR7P zs~;aHVHpM15>vo#HKh#=xXk4faHib86*c~w)*~H5FkaX)2RNj5pTHFmq+i^553^_?oA)mw(gP8^aAf~FXD&Pb@(FwA#rsHUc zkB=fK2tb#;3(A*@t;v6rYZ@}mO{b@$*}(%)=LV0_6ihK=rpjk351I<@_6YVtA@;Fe z@CI)6G%nyyXxp+jBaHBjG=LDqF#3~1aFb|EnpW+<7wqZe;DE`23x#v32gEH~-MJL| z6?Jtt(TS(}1Vnx(7o`!^dH?SCp#JC?Tv}lz`Up+1*nNTgiR>bTG&)n!@Wdy^h@ zZ9WsY7Y_xrSeF=*%jY$zkC0ujUDX2q=4FpX2lB}mgef?aoLH}%yEQ)f@UlTi*FbBd$?3M6`q{O8V2i?bKaiDdL}Nxr>D;V&jtbv zhlnJ_RQlHmo8CuEL0@pOM+)qBWqfQQ3B;We^Q4H0(nM1L)bgXpF{3N518Qv%KC!r$ zf}ol^k_Qi z-?a(zc}V@E(xK^iaqvty@S5Qz#L&yT?N|6wtDImJuacNk$neyRlJ&<^p__*)u>xVr zEI=+L$|8C@a7Ci~+-j<9^m}?mdTx))OzFphs1&Gz&6v1U0g1s4vuNw=TojCG$Ogpa zCr0c!?E|R47Oa)nZNuB5n_m<`+Uc#-6i8d`RttU(kx8L2uE z2_6ZWn3$d+S_$@BhB#HQQzSTE!{jw5w*&sGd=OW(glX)u5rl`lsWiRxNvZC+wSO^m z-&SY^5F#m002h{B#uXjU5;xXd)B7MU;1sQC;-Z*{QQK5$lmq#RRqi=~=6WYjzJUZo zwqvsZCkTX#IO`MMotmQ-?HH8WTO@sU5Ar%Y(>f%_3@lN3x4bVb za^~PHK^x&N7vt-fE{)(`3M5W^xvQMZnkLN0aM{xqnh&7%T$)y%vU1uLwJT8jP%B#>%JHcbOS1wUIGjxwJ6kHhTyXf)=@x z8+i+fgfM^uNWi8FhfKgHq$cBQ*KqJ)-dC`IBL=?fBJ8~n<|MJZuo-59c7iQY}GGEY^Ma+Gc2lJ<}?v^V8RPCLe6sXAuk27n>QxU`+>|KK)EX$vt_aQ@v6oCT3 zJPxS09Lu(U75ZUm3>7pcDj)4+Ul0bW1$0A>;ze+$$q?aZ!J=ZqrVC{m_+XF7^{0cA z0Tea!553_U7#1{)LGN)1cT?en6c7MwO$c#sLr;BL?Bd3qHQZVUP0aM)y?y%vt^c~} z@QO0@^8}4u@?lu>YRJCSXXBWYaw`bpM4ET0=Y6=*LL%fj5G-(s|AaYCB#tLWmGdh+ zAc~ZK-*foln>DP?e(dLU;pkj+(Xs)HMHtZ0z?4$Bxo`KX%Xh!3*pX4l$$9%5gqC;O zYxdp_%87@Cen*~S-l?sT^f2u0eP^wk@8J?{-*OS4AOq^Kp~=w%q! zcH=l>9(?WEwVzMyl6>oSzXgWoQG6<5?C{>h!O#9V%Ogq|dqR9tnXvXB8yd82Oo*0w zU0qd=H?xQmjpGCC#&~qRC=ML%pE%z2!Q;-IJEht$+?pT||v z3SNjyN3@}*jQcfDzFnGh;2H*L@DlLjGBrP6N2kxZfB(Dsda?Ss@8w@r?Tn`(O$7{` z6b=I&1z+{|Z}e}D?F-vmf4@va*2VY7;NT)Ijv#IoICXn9F^#4EkmU|;dawh>hT@A( zUrw?}mJ_>G9 zmmNdsoS|1v09SyTOKEt6v$Pk0} zR@|C&vLXH60ZpQS-2|S?{DJcZu9hrA43>e{T|gIRF}`$#u9Z}FBOGlp3K1LgK`{S7 zmcGTE_M|B{*D?tP%}rYTutSfuONpr~&9tN~9|L=r6lQ9f@i5X6UdlyJb}!muViu8H zWsIVG1dIt`?W;3{+zWDlu(y{&X>muB+IKJdC`fqlVaz2K$bhq$2qhR7w}<5-ZJ@_z zJsU7cj7GhT>~?x$Y6?VYX0K8L+5u2>s2L zKA>H7ZJiZBaGSv$U=rhFg@{Y(4G3g+?Ssz_nigfHd);1UhWg;LLeBbyl>T^~3=@vL zQ4lyVzd^t}yvfS?fJgiO^48`YRVx5p;9g-h+B1^&e)$>l(v zuF@BAg&FV$+B$~8CW!ILp#0~+ef1VY4nVyQ7%qK{HmP)-<@#XTg2M3!Ey()(1sE&F zn0-J}k~x;;i+e`Z@i|c42ye*NkODPn;liJQ)}ko#w~=5dkF`^_QOhdiu3UEnGlSWj zpW2=nuZnVEC$3U)dw&7y7dr8n-mRGPC*$c!c1)GTWrE8$Bdo@x_%!Z&+4?N^z{-H= zi@PX{B^GGlV=3}Jfq`n61yMhm1QZdM+X;o*e#1sPzEh`8VWtqrf!-IyDvCVIr2uBx?D*tthsO%8DycrDHAd@hrkLfv(xk5=$KvU)JIG<9PV!2~@enI+F46`WWzs`Z-qbadY0JH3CcF7K;%Ec8K9~ zwDYfCzlJW`dA}MxD6)No%Va%<14zb~YkbURtv@_o&V2Jz)4YcdTViLpSU#HoZ&Ru~ z*R5R}hl?{!o@W57iA=GDtGu^>Ujoe-mZca6Qy)DVB7};ZCIoVliTkitjbtTf-<(}UqeU91#X3|ty^4l4y*1_yx}ZiNEU zw?~IdQG!?B5<(M6ra_(y8hQ>&Gr>K{uI%}Nw{-&6Jlu%h_Za4gi)f=Z?IQl{e7JDP zdi_zqX?pi981S&Mvy-$RZUs#P)sk2$GeU3!5KVn3KQr=}?xI=eMV(J-ow_Bc=%uZ4 zRjDq4D@SoxP}z$)5OdJYjrl&z0I_AhdUalE`NVn;P(N5e;0a#zSwVOaOlVxB;Qb4I zA$OjZ&%%q4JAQTJh>pGeG_FlV=KYQa6LsyQoYOm!hA}Z$hk{gl*mVOvz2Yd(Ysf6| zx#*G^k)AyLgYzfi{0apZ{SubHFFmngyqsuVM0nRwL z>ERc#vDdVHBbL+s)mBN+EcfEw3jBXU*+B-m4a4%T3V?1gzLmSrcn?BUBn%8 z?SG7f4W2E4HZU9bG!d^%_za$CKuA#U`Pd<%^*1qByxX`ybsIYZg;U-cATd{?JqROt zk>pPakNF?M-Di%TFJAoIW=I7Dx~GrWbD(dyuh;?^1{a?~OL)RqA5-?}^rR!gBd{p~ zdNtZi6fGRVT&Tt=xkZ1u+wpO6_2LAff$+brL)xai=?gA1LY2@#3n8YV9_@(%dW}s@ z#2tWW;>#z`2bCP&3I9^nEMSbwC|*FWBN%c(O{_FMLkfB#H(13m7}+6$ntj0uf6q84Hk9t~X{@T9RPn627ZY&( z9XX8;3EqBp>V{2_bbw1QTzJCu;6Ppuw5IZXx6DS+ibtMf2pmXjOR1r|xRu;@KxPQA zyZ7d0?7RQ%PJ~oHQH}kSl~_9ur-GHTO1H4@lV8qSHypdB2VnFK0#r*n zjNbXdzT)r!(2Q6j99=5hWE-nAU{1UW7RbpT##N4zUkrgH-U@2n<>#ZX^+MC$=aHare{X(Wonq817>-_HHRA zWGESJjJdch&SPSAB|Zf>QG>A~_QvCzIz$-xsIhd1EmC8h;ND6|=tB!aQn?HQIB?(+ zdTdOrsQd4!Ka0l0-qDdsB8P#_J7n>OD+FXq=v<)TKswo?hjn#zr3mWdswh-h$?4YH z&~s{V(N<8eKifP?_o~&aV@CcDUuPbdbH4un+t|jS#x5n1%9f=lNy->Yku6b{Y%L-p zv`EX$*tbd>D$F26mO`l1$kL7yl1jRhl2R$H`aR!e&i9=2{XM?tpYwQ}Gt+(FpU?Y! zU9anPy{^|ax%D@BkG4>E7n5il#O*Gg!l)LRhUoB%cb-3%pCB<1pSnj`cX~R;-d6HElhA1mz-9!mfQdVYHH>eY8 z7uc{wG89i;TDRQ4^jU87!t_Fh0&j2YFOFE=i6!vZG;4872^(*)flh8q!Kq=CP0uI> zv$}?(2#)>J7JC z(%`or!gLRZlKk!8{r0=PrhDcoth=tp)mBw4+%EB6Fnz|10T%UtR`Wk4zldO>D#B@Y zZMH*1QlF&qdF|r2b?n#?)tE8!isb$z#nDU6%sw1Z0~^^okLcZ_MioGpw3=YJS;m8;r>O=Z~V<`^8qK;%3d ztnilnDLN99B%Z%PTP5GSNbd)5sP)D+S*D2DPvI~3Bt-Y|nepw8`M!E(rnAR`Fu{y? z#Nz;Z1_D?L@(E$Q>DgjRIA$yTw<*aR_UqTLjS6($XnQ!HOuh(Gw@=4)(D-%eA!;B< zFWp<`hPxOt^CjC`{W{1ors_8-+G!u1E?u}4zLQjPAz=6;dnU&nPF}v7*?=8ihJ)HX zSy?X{+7s+v&7Y8|Q6jL)Mj;*54QJq5*qmTT#8HLS?9j0z@|ZkpX6VB_^r&e+5p}MV zN+g}g{tjF*aEWSTSrly~F?ILr{+{$%13$%#I2>rHmeR*Tl9~i#I1lhHP3)oLgto#m z1^Wp@o!8I|F$JF{NxjWpClB{mI1xG^{d#3 zn@i!818{528nf~CcB*F6z$;uYRkaKJ$pyH*KpBbrpA{RNT1+K?AYLj=2h>dt1ncNSb0y)nz}S%JPB%Blf`g6VQ z^ZGV}3i3BLIdbZoZ39ahuu+8OT69y<+=CKs+O%m|HWJTv2rotB^4Y=j=g$}1`U=Dp z&;4+CnTl28Rpi~}O1LxKx?!6QND)TYvcI%V~cN(Xk7ixFzyqA(n_3Qw8R^^2I2(OyC(WG}V#c@~h0GMd3 zdcVERohwGTZ$*l}8aF3l^n#upX^Wei+qpl?)O8-xT#7|zjAms+R+#_L>6wN)?!7cF ziwRU=6!TQn%2cqk22o@E-E=3tk(4Und$VCV{`kkL!W(X>@`>OJmk&o~A>MUPSci8B!kIC|pI&mXO5 z6J2O=j-(ryis?w!xl81U{k+HCBNolN`7#;aiPQ@7@+I5m)OIcv@CFbk8a2#WV-R!U zRBTDyyb12Uk?#l+6wZ&srmhhhz@9Tp5`X0O{_6HrrmoIXvfzsf;acQf zhz716h2s}=My~X;aD^!`3;T&Cs&eREts`~D!BgBO!Tp78`1IEDm*fEToYPa=o9`5= z;W0jQ77@9u4ll*!8e9`DXWFs{FR|FMgWw6G9)SAu+RoSp}?Fqwje<)l{*7-en zOaJ#a3q`)xIWLMPUX1BQ#`jm{wk&6Od~mkM%C{b^-T}w1FM1aRRa3e$bn)%Y1!SWc zcqNp!lyb2728;EwW@sQk6}NQYBYRAopkYuh$CXZB!&!U!#I+}_{t8@tH(==05WELJ zl3xd+l%U2Goz^sqa=MSFf+abxUw=d0h`Ds9upNO4zWWGs2`QKBxMW9T6~w)d_!Xa* zSRC*ZV~lReS3)-nN9d5p_J9MyLOKq$YDTPWbN?NvNA_s5*IJ3e>m=)ywdeQ9d+6#$ zLt`geOmp-D{J|AQMn>B0@>0+l5>{8DmeN0@)@toQzN zLMEAZH<&%QDWiYfM$92-P=y_zkqK0zk8|$;;p7lHIm7 z8@`}I1*)E#l$AGT(cGh%{o}BUS~MXOJH{Yr#yLKzE?INV_>{c9w)&5-*<$zrp2J-9 zTrWLd=nop;ekU1yV>AknYWei7OuXeL6!gTs%nbW8T!``1WJ+Otkz9Xy`9|b)#1Ub% zd(&mVXwqr;nrgDhr>d%I5{e#xj@6)z{jU+WIr8D;SHkT*kCuBR*nnJZ$+5jdmrbt! zP+W{$^RA#0vNseWh?J&Im$523o%>nA{TGdE>DY^%w+r3nO~P*LPu(NN*DpPn$Bfp5&F4Xr{#@+y(kQQ9@TB6GzSG`8oSae zL`P{~cdhcqzC(xnwp^fj`X%|Z!zCaS!%f$4k%UhZUakDSA;xIg7=BYv@OQDGbxcO* zn@>0#mv3mpZx9xR%quXal+UaCURKkxt^tud(3EPPDdaN-hxaZ0W|WB6*mFH zO=dh={OArpihhPsin0R4pde}l8Ja+V630S$#-aIeowhzYW4iB7J%CJ=_&+=A+>EcI z4xpm~5E;L>6oKJeN;k~4E{*Cq*q+&=(HnMuF@Nt!3zRcT?jLZxC9Gd30E!2jhUjwr+lHlh z&s(={U55ID7dC=+4;O4jsR?h6mq{QjAbs$~>C;+l7h=S65`kq0d=d)E$}X2?h(`aS zs!+FImy@JDss4n?72T*#d*YCUP3WQg{D!wy}Q+7 zGjsDhNW=&OchuU0E7$X*#LxzpH+d-?Z963V=pcB|E-=F|RhK%AQOk6o2e4~`QQ}$R z$F<1F%$4#88$xRHS6dlS2+Bhl!!w`-i$qm)1MmiTpxJiDPw*m(<+FxM-m3r9y}N(& z>p@HJdYm!*Mdp;{`7w5+yBP1Q2;U&}g`DMg6f$moAJ=WF44O{##DV;ou*Wh6h}Unxt`AdyAu=@jZ0!0mg(f~tkwi0<%UynXG~#f|>`A_rOVWOsc1}%WJC9dm{_*OtB9D+h zcgHbp5?Eh>td2IQ+3dW;rohN%xQ?L*-&$EN{nGF7msigD{Tusz=hH zj*v`u2X&avK|r-Q#uQO=#d`M1wly3QPT%{FA1^WT@LG3X z;+|(YIl8PO>Qi$4=FNd9S7`ZB11&Q3dnJT}=&7g=6COK?+!epfpebPLMv$QJWGx7q zkqO_&SdrLW7hkmT67-Qpa8*}{6 zZT2=cFC%T{Z7dJPe*18aMLWsofF#8`P!j|)RI7`o@8Go*gKYT}672aRIpAD~x3@Q< zD?xi;=fcnxJ>f*d`8;QgE=LZ$l;O;a2OA(TB9PDe{Uv!iQ(13tXM; zoJ)`G3f2=m>D*t77Mb*m*-s-!#OCe1puEx9x@wK{#^RO*;`7t1p-K^ct336qpM-%` z_y)3uR3Z6uTz@ct$aJ7xv5-PAF|BR3){kV24mF1j>Dq`DKKzpeyR=jKE1l%~GHf8L zK6B<{9_m$oG;$^3P~@Y$SIfx+_UgX5ibV40J^!+BW&!FT{Zo#1H62hvrG$g!!GgUFxi-8#XL4D=UlcSV6ns2A=up#wUqHB;Y9bo6sY=frL7wfzk)Yt+0IT z)CczO5y||Lo=tTw^?SlR0&HZ}lJw@Y4?dSM=XgvnVfJ|Gx&itH+q?H7Za*2Om4o|H^7(bo42DV)$+o$!ZlWF3U37?5 zSq}8MU>ft2dTkf6DPhJ`$~|8p95Nbb}ZK!j!xf4&(YEG z8}sDj2c&rqp0>z#4Xu_KiY;gxNLAAwlD9XLW|-cJhun@hOcGvru9CrbF)RY-1@q`Z z%br|6KCp@wF%2MtJ|amu&un%4_}o&WxZJXByBkC{jsVQ89S5m^sW4BH&s)l@?Iep^ zE8rCL;8Q?P7#?5*I2}?wEiIghYbLv~T_9S+>@k2;BmR1PP^;~Z=dYZsg!}iwyFCUB z2s`kFi}#W*cIMgm_0NZK5dW(8%u<3^Tb}6ULM} zR;_WfGWE;B!-s3oCNiQb;AL_8KsuL`E|^{ai^)j@YX~GqWFf?egs&nwpr{o7EzQF?s*+OwI6J5kJVEnQ9x_` z^L4xvqAGz1W(MZrIYk7lgPfLZ*REhzP{fz&YLixEY{78atm#ZaFe8HGh`_Z;K8i$= z8|TqNO}FRkU+pAs!x`e{oedQ7Jg`l|pv|0&$^20A5VY{2Xp3_w+4g-JWIk6wo9fPQDmgVM36JTLjgXmX;Bw{8ocbyuq{gpFP_`6hxktfBMxDO_Hs&nON zhbF?EP}yhi5ZCN?p7BFuf!nVy#yNMJ{h^gy(&ya3RhArEq9M7D6z{mGO zun7(}#>g<($7jJ&O*8H#;l1QhB=za*Ei;E8hZz7v1V~*fFIjcja-=#OF49tUTH!xk zGQGrhG{xNXaxTM_Wl>kcGcCL6F`|?v00EUQqF9zvTERQ4`-*aoc^;)N$L8s~n z`Ojb}b*NvW7MEm?*AZXM5PHk~hRuJAN3BuqfOn@)pC*vwA38Ls)zYz|L#4d;Pw%v= zm5QrL+ZkztBnEr@4J#V%IQ_*7mmk?ruM(o(p`+z2PFn5Hx}_ zD}d1_3>@gjy-5SL8yh0JR54tY%GS=%{|?lK}GJYm$K{%lSvWZdxvt zo(UUktnrPGug|Cu(Y^0;T*G`w#B%AuGgHnFD%r}w>NZzl#Z8i7*IoT_i;;IO+X8q6 zow4cmUz}jdjwSp+g8`T_b66e8T(lrs%`Xo3>)o40z$0?CUSwn}1V#S%<130Xx>7ub zm5%jdwUBrDzI;#4!|}arx=H2tK{E z&{xG*>EMie6HB*F@;Nt3_X}giqiF=vi9fEhH8fSbc{*|xYx4y11F-V}KUOVz&8+=L z8iG3sm=9q6rA&>aqU2WJ*qCyCF&vjw`_^_+STPgUchB1s7{&vAfF!ufcI!rl=qe-g zK2Lz>Su?+%!f8kAKGCd1Qshhl?t2H7(>0cq44U&oIhm+%!*W}4dk9(CY2)IlH*f)fL-S?@gg1J8HXNrMhnsEBiK_U^W`- z;t?8FAYv3xVkg<IBc^;*%2k07J%(o$ofw4&sQ=R9oa)*6W zx!efp=D9M8trA7=PXG4ukyYp*hX#0;Hw^tWVM%B*)&Y!yQmxQe^!pn2e0)-bw;_tx zz}>qa0%?eNe~p;qy=mzKq&v(mvyFuLpRm$7hvz)xMR8j5=t$Hz2tcc$UV6zY!t&G6 z2^;xoSmMqJVI-Lvgb8ZeJd1piev*-)nL|AVRxN+}0zNbvl`C864B=93ZRZGZQm z3*gMj_NGgdwrDbD;91rhB3I>A2Juk9=RyoxVS3RvWBOtuLcloz=&JYSbHvv&#iCfb zj4)F4ajjxy(5N0OnnI(y{CSZI;5EO18UJLCD%q#9ttfauZf`>+UW>& zsAW?s^N6&q#nx$Dh0p$8mp1v>mZtZmr44k4Xh<2d|-%6Npa7;lbvcJ`;@A)6|>5*@g|huuvU|&)5}Atx!#ZZ2}_qlTa>ZI5Seu zn-@z-?DD{e=Ztz)w|daY{`lkAKjeP}{ z;*)5eh0_!;Oz`&S_Okvr-+)pdio3sK3ZwWr5DPE}C-58P zS$ue>Eh{VgKi^^YamKDmpQ5CNQdV`*zzkG!>=L7d@$m-PEy;I|6>KfsrC{xcd4bk9 zkdevs{Yif&3WXl4JE{2Z>+qSHT)DP);MC;EdFG+v*~3c_pP7oMeSi$6qp$)?RR_eC zxc}x~kwT#*{KYRWUiwZu+l#iA(XxccGFU(FUhUp@z<|57fEZo1dv$GU9~CX;ooMEa z$DfPywP(r8`i6$&yaGa*!|PurLIAFE_@1LWYaYqRY=wfs-pXn_+IBok{|xH;cb5I5 z{hWs5>zV?7zwy96;ujj3X7`py45QMEUyk)GdY>@a{j^2zkiEhAe$!Sa)O{jkidujv zAW~wR;S>r9-g(KLhHm_B<$wKdwe4RYG#lQnf86ffyM@a=E?9(B z|L&*ZVyi6=1)Mu)byUz=bC%LBpezbbQ)$fa6X8^c;tS26Jv~zAnjnRZ!uq9l6#sT> z9n8m$9q>I&{^9aiXer2MugDg2wBJ7Su`0M9nSaRZ`&HAlzWB)gZt)|Xi4!T35t^>_ zFU6%&EGIoD=z`k~-6a|_*=l#Aqf@p7DqQKmsRafgmmz{|T?`|Z-d%KVhjfnE4S=vF zyue4^wHJUZ3aheKz?C?$GRSb=kwY%r$xl_}d0)B<-A*&I}QCPBP_Oww= z;-uwgW^^yvN=1fzz12ME8h$Q1i{_)tl`{Z!(laHMXT#81dO1pmUEtS>Q5?QzLF`D1 zJ|qZ5>n4vK>#c_#E#Bkb&m7>HZbI9c!z4owSA?wN1CE1smEizWEi~O{`(IAruN|G@ zC8w#{>bm2r+`(KA<)|1GVO|(6G*LqBjgy;H>SyP!T6_mZYBHIe{NBA{MF~v-5FCP@ zEWv}yE6;v%F6zyLp74Af|NR(L41kd?d>%We@~&@-lkdit)7fp-!c939Tu9DA|?xrd_}Nl>_zP}{Vk zarNkv6!3%t)mE%;Aj+23=<89F!-?c$5Aao()Vke}_9KqG>WoD~dJV}wH>hu45mq1# zSi)Tf!wn+9J_4BxZ>}hMRj2$7qAzZt%riSuc{JB8F}q0MZ$@53{uy^?#NlEVopYgR z{<0&ombchi)$RZ1kdOhAm`RcAlY8Sw-q5cEf_?cJOCThu_kfFBuL9AQC^Qti)L`h| zgc{2q|HL2hE}(i@4iiZ(bJ(`pxnhmZkrPwg?v_0_3K#`nBa*kwK;e-Ic_i8qL%0-& z@bquJ6k{7vFbjY6d2Qio38^m$D!@Q_z7?i0L)`3zup!wj4 z9~f+2bn3(V!a}aeNz$V+FP(7(|5D}vf;U|aEK8qWJB8@u@KZIRkr{HKSX<@zPw>S9 z+fthQ=8a`Z%J(0>k01sBPkXkJq<}6UGFknNC9eML6wcxg!j1-Jqd9R6@5+*L= z@O(Tc>%Qhg_JMTiCyxa|Ky|>jO5-CiPI~EY;dxLt(3sC*ljO zPC6~K|LbLFD)P;%ti(V$ME6m*8(yO>V-_Hq31TR0M22r^Y>GtkUR^^Y%J=k3%`Qd&g~ER1Afu&6c8!MUcZj2gv@u@Svk5i898Ig9Y%Q)lD>i0Tq4 z<>W{kYpw5A2F)c_MMXJLi8LsT89h23TR(P7`0wA5{o7ud+uSF>dx7%Ok-+%~lR2+hTzY24v z<3&eNA(XcO-9SCLxsLZ>YM=6z>o0XH3^t|D|6X5A0A|G6dL}SUP_k8^T;r;?Ou0 z0(2?DW_jH>`cSMVDGN^`x%Gw}dTh^;q#ap8f4ePY)9OmhJ5O?glLd)+d;eTAk9fhI zkvMDqa{*}CDJfU1xwjDhCLZa#ONxi{_X;&`a) zaPAoh!+&l6^0{3kv$|&gi8Q?vTG&pV_UzpYGvi>f6FkNEh=Jp6f+p+I7%#lbq>sbC z(dQQo-3;Uu^CJuS9K}>!ci(eddOI+H^i5tX=o8-b{OZUajAE6Pz-hD>T^c{(6lJx? zkDY#tS0$J(F*@4)6Td?)drRVPyUv|spl#&JTFcu=Y*O;hb1EZTH4f!Vhkd24!4kv! z%u0d!1aaF+40Mi+Yxo2#;fezBL{20L3%J!I&PoAzY712i%Tm<1?%0i7*i|HA#Ftyw zuhR)Pp+kB0x3Y;0ls(`3fSEEr?J$uC%eVgJZfF>?!9(JE)bA?@_b-Z}bL$sN?wN(R z{l8EyIGdlz;o7&$h9L^MK5fZ(3tMVZ!bUEeRiiV`yt5FBZJks5Hsc)p_oW?pTo@Qg z`X#lKSnMb0lPoB%e9Ow$qM@a<7=t|(MR_;-O~dy-K<6#Q1`v{IZ%=oHl(PMv_}VXt zJKjOgrC;R1{P#~z@#uH81{e8l?88#8Hcn*hd{X=DztA!RWD8)>1yjvtjhAb!tR^f@ zdGtt?YlR!+&Uy!!3oCp2hq`!R65bunn6Y&jn-I4LyfyV7blsd~4=~G*b=3y%JT*S} z^r8Kmof+TX@du7h^4CQ&_V`A1KXs2XEf+{Z+&j{L+%JxOTR&HM@4~@L_hbi6WEhL| zPgTmoqs`fR*zr&_rIH7T%0+L@`#e`b@bA$^K{-ejGzY>kx)9>%i6Kb80l=UVwC|CL3(`&RYATjT?`6QykTQ)z2$Y{;Lt ziKMk`U%Is_?2F26g?19}(KKxsWjQ5s8QaG5E-lpKL60=M_7?5$W~e-o!v>5QI`r3C z^N6n1H8sYyuXX>uo{03yCqIIv7ICR~xow$rTG+zwsg|-DYaZ<57wp7#FrtSDPfxp#>_ncyW z&is)*)`6kAIvSLcGmg}y9qSPn>n2jxiXT}!bm-9dInOM+YO$Q%yf0n&829G8BeHt2 zeuJUi>~+;B=YaKl3vkO@hs-^UjVN2Mk{}B zQEDfNpY;!d`~PID89WIwCKq=H?h%!PE6Z6I>QRvkeJSxeJUrZV!DRQR9V9Vmum_X3 z$&6LCwOTuCgBEpXluBl()N^un`z|^%WiXK7D|o=O2gsAr81A*!uh5+0K3B)qLw3@d z)P$LE)_r%wt9V?Kn@L%2^pdD+LWq6fOX7$f$U^Hg80u8TRL*mWe`%Z(3@b#_HVQpT zBJV=8)~@6>V&C16mwY1HQHs5hBQFv1btX-6*r-j54R_yG>cybRx~HXd1vl+~ax}kq zDQJ~W!&s)vg!9+m{v)yqKYS35%Vb6QM6rM26-$rZF&-`MNK!1P zivG@?AS0bszKbJ{ew^YUN6YXN{@%5fl|Gt&ma=YGH=tou**g+`EE2x>TqV5_ARtY% z49Z%aSt8>pb^j-Iwjl->p`DhrJ`O3cJw@4FbM2n`Ax~n!m`s+`Um0x&KSjUqILqz3Hz*H!;E?{=^d`{gB$KAAdHNy&?1Ow zkZEM($i;tA7r1cS_NFJpD@T^dkx|KBW`ccs%JmO0iCAHr zrsFjB2hp!vS{ixs*s)_YbLrr$M{5r-p8 z8Ky5p7`jc2==i-NtD^}Q_F0J~dse5aK8_MX7c_NT`tD;CM4S2ls6%En?b+6&Z(m2I zON0#X;zBDvv(2a)GcUVKX{&m5mn2T4c+){fK>G=6M)|y*3v1m3t)YB@60f1I4nJO` zYYr!=$JNn^X-@2hne(9k$ZFBYY{f|X|HK&Y%g~(Y96>4ylbz=uX~1)*zI+j);1|M~ z{L)_!&dLs9Q#)2@@gI`*uNKf~{(8`>2qEK7qVq(_L;d#Ro|`MIsz7Z!9h?;g4ooYT zm3*H2e`c~hUwYE#k&}}n%MKhEe&C(qeuZuVkH5H%L&k-S2Lvy*Wex}?uRzcrI{;C` zgrLqgx~GRH+}0`w5)gkV-bOgxL~^`-|eel8288w91%U#{r3*^)Nv3bS*1b0qsvNC%)%6U z>XCawLk?kq%Qccg`Zt(A-9qP$x)hu`h+=YsfBb4?v{-u7;zR!aIaVH5ob3EqwC*PrP*n99r-vEZtZJc#x$#8VfI zs?iib|Nf+p?GnXigE=jG@>N317ujb=)w?ArNaF3@FkQy62g*dWeXBu2NekxaA{eW0GcYXZ!q})rW(F8Av1I(e&W?R4`(J z)K8?Ce=%BZn2c>hqKRN?v;N;w)*psHcI)OB9;>iuXyceBCh`1kYSX1hE2ecQ|Cedg ztO7rkm#4?Iw3nprDU(CkHA3yY=-g)$n|TxC8qWInbCmj4`>hrJuIKLboId|eujT@S9)0?a9HTM)=MI*S zKDYQ+$G1HCJjvkX*7D-!9Z)_`?krEgQ>~z%*LBq-nLmyz{ED4GlUU0MTgfrHV*M#` z%MNZ#MND=25T^}j_=-M{#e;jwlzX3Tz(1q@XzDc4z&)~4RnYLnYY7BPY~A} zI;1CVD!ayviT!f4Ob+9r2k`u9@9`k_`QtGS4uIx_j*_uzRq}5o^=ShV?L(DD+AGJE z%)Iq`Q>ac=PMwV3D67iFJ68hGdI*|Ya6>>)K*@|n`~7D7#*`awHcHoh}rt+IWdp?;=jbgoRa4R?EcU7-8XwSS$CIe zn|S+2nSV)8OR~Q^IL?*_U^a(=T!5#Z~GT!-@e`~V- zDWXa%uQ_y3_D@wo?-T7ODaZYtq@tpdbR-$*h9k~T`ZLd+J&VL>s^Wl%Q>xM9*T0xM zW5x{j;o!*b*()(M)vwSGS0s4)Dpty`*P6F@@e61fA)P?;L3X&5?SVweTVYoNEXVPz z-jdU!yz?cU6YXCZxNCMacag6*){p2T)lT!t-!{X&efl(uj}SEGu{&y(Sh!4gOnrVi z1XP(VpMPs((1ep$+DUE?+pV9aGjikxygaVP)-EN&?vWjI#kwV&_w1!tYX+pz)X;eD z7jomq4PMjp=g;rk$F!IDjrxxlTbQ6$qr7aYd!|CYv5i#wZ4=M8=;Oy|*sdWhlhPsF zi+(dfa|Jr>UEe#Y8tHTnL5e(n}McEgI` zAc;&po|)%|52xRnW&K>E*MVde`8Cog8p=20cS-kn%YT7MrFOefSsqna=X^8OPCCP< zMmT!a8!wY;hxp`sR;O^HQ&TwxVL>^mYAjU6MYjd>o_w5i3Tl zi(5E{7dpQ8^Yx`T-zbfODoGk>s&`|~Y{jz(wZSL0wzlh&Qz2ILVKzVm7c;GE5$q%y zYXT6rzitvMf5~ozSu?%!J-Y^mgy5bMLw{ZLNiKz*xEV>Ms&>-(9aCFkMk}T&EPHv3 zij3rKu!g24^#Ceqv=LWAzilK5Yv(x^wwIl(RGc*8%i{vec{NmUVjDqGjrQ>2hG~8O zSh7qqQJ)j%=>~3JtQ+;mx}g&$OwiL4%@WSk_!quSi_Ly(s%7x;%F>%7Mvbx~4s&gM zugW`Y9QEMXn;W*iTOvG3y7t5O*i?$%;>H}U`lZKGUHeS0_1wLaW%7<|Rr9%gEJvQg ziVzS&MRu%Bar%n)1EU>XTq|Dd5h_5Ui8GQyFCHGRcIUdZlb9qyDSg=YQA|nViHe-4 zc_+Ie!5t%eO7PpxuJ05xkEZ!Nt^mdtrK>tcPwOl@aN^r2eXpITw5t-Ae@hJ1R@PtL ztx>M9XL?#i&X|O9jb@oL?ME%ATHYjWt!~lxZT#cm^3c=@-D^*r;wmgwp3-x*;{3hB zw8G*~oF|y>jiuXpGsL>EuuysA7^gdkF2qjvxdWcB*?M$IG0ukBaS{%4Pj*L_x|ZK5 zzWect7|b*@ToCEP-Yx)TIlKb#q@M+q)z+i!Kf%V*1G<^mZ>G+#VGz${H(3oQBt8VB zg$s|CT~A55vi!%Z;gYu^*v3zZOGztnpqs#J?A)#Y)5s-HM(0kd`H+?N;EauS zc8BbrvlX(3-E?{H=dm{F7L&r32wfjB>&f^$E&9VzudqiKlSchDtqRl3)18X#6)C)swN&U&}IMx4xX4>$+{w_;uIbWLjoEpK&5! zOJ+A_w?njaT+RwT5Yb?)gH0u6O(kL$ZPI>_mGwY&~;4V|G zBZXc1{auWMT-?6*WPZ03Pq!yNDE~vvQ8U*&OL6R#t&j8an_u1gga(`5kV;VKt;}G{ zX72ai?((Xg$D;9dSbMRfwekkVK7EP`3M#QuNl7@5vVbNoz3bac?S6@W)q0$!6VVt;@x4pSgL+=S9BsOwVm-%80dpyvfa@B(EZHQ?q-C=IxJ9uQywM zQG8`?efO)rc6NBnlNF8IKj_`gjDM!L_t3}coWydy&W?Ub<#Ek#*2X`{?Y!;7xX;D; zGqw-Zy8h&8msnSsCKadmZ!Ep04!kQ{_SDJw{NIJH-I`-x+Ml`mrT>@6)Zm~&UrfCI z{^Z)d+2!r+O$lmW2aLZL6kImtLi3%ge|VHmxxK<`u>Lo#TbbU5xqFREbha7E$|;A| ztZz&S%x`(MHP+`UjFm&?oMU7EJ~mD@ZMLE=l0t@>u2hyPC)&sN ziRA8fR&+K{bRu|-Y?`-=N|E}OPG{3I#&^n!q|(GomP?l|Em+Jxjw~uTM3};~Y5IF@ zouNLtxd^;jycRZGvU$z-_sxX3`?GhKqFOSmy!v$Rx3SLk+X+vNLFGAT;_5GYpzN{I zLr!dcS*w6^cKnm=hw{)DWG=cNfUcQ>p zx$uSJft700?v1lfS5F?;5;x2}Jb015zqZW7t9m<^Dtdh^Q#(AkIIw=J^4HEqK8-Ca z3tuGeEN^e_KGuH9-1kKvOaG{h*cLu&f?~;U7lT5h4}ExcKE*G|1kro&i4!OM{m=mj=2!Hr{$0*U+tOO0gGCRRa;7E#e#w zz9S7H!f(uupuO9`uY5El)2BPIhsp~hyJ51Ypc(qrw;Y617>c||ln z$Kc&%k#O5(=x)81SFrx*(VJj7tA-p42nYpoZ{BhI_;E5=)lbXnZkk$bMlRp*VwruC zqCIk5pa&^UNGOFJg_F~mrt^i;A2KsDv-Wqb3!Wo+I;ib&$9}%wRsCMmd*8kTwHnU3 zc083^y0Ug{{{G0g+90vGo#8d0*qKD9xDfa}XB)4|9;>Z~jvO>#zz$|SOERs$41BXO zvZcxU^2XIRQF~9;XypE=ADtU;XM5(edbdv78rRkaGn2BLj_#-NgYVcLa`bO0kD?|A zW?GJv{vEL?can>Y-`;u5z+$!id|Jv4G@yM)nLTZZa^Ga3`Gbk}^x)hDZ)I!)dTe!k z660U!T2rC5J^2O%>C(5#&!w~S@RO&TReGZy3ET0ePV0sd@$u9`*&y&|LCfMHPo@J$ zS{QjEJ2>KW)OtwOIxANlEiEY#9P@(*zfRaNQ|7S!-4fMom6J$)3zP$4$Y2GR?IOV$(<3QBS2?dy$OK=}bwLiz5oxlvEL&B^IK*FCdSefRi2t6Bz?QY#$tI&c%s zJX!ez@rfa&t=NF9e{rN~7Tjd8`h*Fl#-y_9NeFQm>8le?r2r@3KZTLJeav9pRH49U zT<1g@6=%q?W5f3Th}lBO5Q=g@S?A94{T{l`pHKVbGo~fSNR@T z3Ln|YpW|_Lh(Y1OFW;g3Pk9&GLb09_R=*J$OKd~!@{;Dq`Etvs;+5jsJ)O$T*usSi z`Gqk*^!=7&Pt^d9W{EA0zjwet?z!Z4H$EPLDczADWo5@Sy&LIEF!9SR;nRedBGn)x z%`CxUHO;%i!ZV?g)0chzsqQj8yz_5Z6*av$U!0ul(bOzkZ>%iUmNd}u$q_<|mG0Rj zVBtJyu6yy~MKO&<69~l-`c+Y;8jL&OI(zv&N;r5PL@zFwsUQ?x6zL+Jn3$6zWV{gf zA>(b2Qq~7nR8$Q5`RAWmRzrBSZg<-6zrII7(~C%_ucof=6;i$ojVpoJF!0FE&SuV? zA4i3AKIZfVbKR;^+sRV=o&E|flS-v5VM|gmduu2!@f@kDscCZL@^mWS6Un|OPnH)I z)qsayvN6IjF6;8ZdUKZroACDncoglMmiGG3q;|uo=v!+)@BA-=q}m(JC&qyjH{N{m zX07?Mq}G=(HX5=(YuT&7K#xLmLhWUe0{fqHCd2Cg~~&_U@p0B_g~*YH!eI!d(2SRpf}Aw zy&0UQ!BOC_%pIntHuGefrOgbB<}AMuet7XoV&>0COZxu&^J!j1e38$~m4D7{T1MvN z9hK8%5cKbGT|`qHh?s&s@+Wsz8A7*l{sIsGi@EvsM^mVJS1dsE?q1HC{a|1|LyV6Guy1 z(N;TGfBmr{=jS|8li_p@!A-ktv0q{RPm-$P1ekX#16%cb{ zF-nk{BjcWBr)WMR`U|Mxc2U4GdOo3$XbVIGP)sDFlX;it-Z@_9zfNCPlNLnZ=oKEh zZ=;mHjB2|Hb4ODSGBI_x@QCh|<}W4;4hr0?HnVozi>oWgxb}gU5jJ+`E2?SkR%>V0{y*PE-*o#p$8T2E zKGPlYA=S8l62{`P?>x8FB{dRH| zy{0s*qpUm;QTYCSEooejfrY9Di?@d>razi2QT^q=Gf&Q#kV(#pD-_5>UFy3_6pW0G zjlr#niSy4_TiV!+VQV<-Qnk4%Y*aXztx=I*zTb6lk&E>CkY&- zre?!KqJC?2ZNVylYQ5I!csX5Q%u6-uqKZU9P}3q zS5ml%RSfJsg+WbE=Z4Z)?=B-VclK{~tCu|<)Y9+9(9*KeZ3l4vj;5xj zyq(p?E~CXdqUX=!RE7>sRGiJg9Vhbya{b=@jnn^S@YT0GMq75pF^Zk_Jg%lBvzS7&0{=0(s^ZhFW*+fEd@l@{=`F$f zax>AQSVR{Mn`Uv3hj)o#Huc zcnWTOO)F3E!Db6QUYW}1ZHldVf&TuA+4WzHvuTSOL6fLS{C@b~$G|S%@$tBq>#Bn0 z@(AWHmdb%)kE2Hsi+OwoEhu6rg&BAx5_(EXN@TmVA0(a{Lppmp*ttS@35ly*hScC& zXwask3Eo4hm*K-e=0`NAymysMzT)HjeH}-)`_D`NBF%oQN8QD=bZ@I?HctI+YoA!x z7JOF?A@S0mq#3f{e2XyNm~b+gF|Mw~+Acp3m*^M|X>nzp$nEQh}VtOIQ>mNOSd}+8YL4_q4ZvZ7o!$y7H(Lv(*u=lqo;~ajh&zL0!vI*I{+_&gUd=wcRU{G^c;MQ zmC^dzd|nu$|Fvt@)Gb;+QT>0NxO*-8o#Jfw66sa@^zEza+7h}B$+Bj9mDE@-l=M~tPBQ8M$GBIW{*wla#RCA7`wAe)*^d?y_cM9K><{>+koRQSwZV+&+gr{Ig=9-aQ&E-_Im%3BUg|O zD6|u{+Ura6g|^J&+fE?Dke1L8z%4JjmRYU8AKtYbS2hi}wjq&xQGl}G|m5t5JUw>_)+-Ih# z0maSpvN$oXXwe{yQIHPz{^l_m6dh5mB$c1gv9+lr9tVGH+t#holoCa*E*DLfy?a9h z5`_rO9{x*2LnQ}5ZDb^;>0r}l8J9IX1uQeHVyHAAWY~nDneK~g^_c$e*}uQp#9YMd z-Q`ZL7(V9BL$^dJvxmdSG~35i&Y+Aq5urSDH{WIp#3{@}o#mKgtvtb@`f%U@;#WHs zQ{yh&MDcJzeaj}Pb}LAuYXMgC;Kgj7b+XAS{|Pe0BWsSfb`ohL&I{5ChXU3 z=xgsO*Deyt_}=~d-^R#P{PKE_4U8UbEj+AEB!u!d-<|(Wv0pz{6juS;Fo^6?e42jc zTTi*_9N&3UhCjYg^^>%uM2-!=w}yHtoF>Un%2_>vv$9$mMNvjWW1AxX1%`XuR3{_$uC}+k-QjkDRms8vLoH>8Lu@iND zGh0CfV1~f&Jr>!Z(-BR4fSuHMmcbDiPe&p@Tk1k0vU61t@&Ghd-hy2 zbo>OX$0;e7XqP&@*)o%&NrT?l%p!4Oxd}&sZeoMVqu$XSf#C+lZdTK(%>+^rqMZj!1zd zf#homU!~?cr_O6~RiY@#OlOGeoSB@|oG>XucK+?y{dIM9{QtQK<%HRiPlbCBmI_F9 zn1H{|ocfR_62IQ!{nS(i#Y{}vQi4DkZ1PjnwD8?6k2$cdq=J8S&XbLfd)4&~+mIb! zFsHpHsQOlRV-SA!j2U~%=Ju;!SqPUP$8!Jz&}thtcI>spxMrLl8sxY+en)+p(%#VN_kK7R8v2cF%^ZG2 zw%zn^{d)QiX~R56!>?JoH&!oq-@wD^&cm4>VQCYJB$7@NIWn4C8r^|gBL=IBZqK{7 zgk?qXcZuYURM|L;3|ZNL`vq;V{Tr^_vBt!VYMd?#9PZ}LpW$RkzTUyXm#u$WBigu^ zPvjpErXFyn`t?#@#Mw9E^$Xtv@W11)#>(Os(FsZRJH&Z0TJGq0t1PHbeUdMifH9?O zll47NCGXZhuFnVsVm#nj6<}^!+uLgSaP@wLt0oTVSR;L*?h8UCS0=fTJsEEb)+)Q8 z=Z?lQ*R>yH&GO4li;Rp8_~&JL&KC90=CG!KlPB*18=+_xj23;E^)GsUkA9br=l|tU zoik=*qG+|ZlUx~^9r%rmQgOTR2wQq+^IpuJjIj~Q^A;X6RNlzqMXgWjsIPBxbF=ow z*V~w_x!+RHu-a+xH(g8dOGt-cX$CL4^io9thWPK&MUTkOr@l?)^2WEbas5gO{rka# zvk^S-%(MJd{vii+VEkX}riQfD$Zw63ladULjs5>P4GOs5qM+Jn&kMXd|Ic3-JfImE zMq%i=J?WbPLUo87ACH@AlP2wew$i5-;VELg^W09c-Urb$GedJ_nrRXWxyQp~i@lTD zsTqg|bgN1As@OuMg9wi&$I0*|dJeBzyG*DozOy&Av*p_`i^PqvgFWaX$KM)iKgsv+ zPszx0+Q>i+!J?FA3+k)qkVe<8q^72Fy)Y(Yz_zcu{c^H6?la|xOma3w3x{&DIC^XH z#7UX~FdYoKEKR$ASD^Rwfc&yPbrq)wk*tTg8dGHivrkj^szCbt-*+_w)WkM>n|m7%?Z z^$E+l-JP9p!y=%WEsVd$6h7$_?zYShrw3cI2+FZh<&h&-k8XW)>GmBFCfAhbE*sr) zCA!>U>sFIURSLS+$CTStTkJJ3sVyp|4jDXnHKyhB^lF3<7_Hb6sZAH^z7y?MlR4xo(H@2$j7irU`5s-fHk)Ar3+2MP@w5raD(a&jl z9ncgi4+f9_Rr)hq=?hx&Mt7bme3ya2Ax?QZU@HDJ4=+g9=}OG+mokO+qIcfLv1iYp zZ>+0BoMA(&obO3wTC?V_vITD(Qq&P$i1={gCQuxrxG(ZC1YQ@0`%P#?rk{R*sk+Q- z<{q&}V2dd9vO0bLUc0Mv=EdF09tbhqm6*Az`=h<{gaK~wvPfJE*d%dD;6Gs(lznyOPOwk@A%76v|yDhZDv z9^J#(_OdE%BCBWv2eUesWr`Knj64V-#bWvI@C@GOPK7%GeK$CMb0;WMWN$)I#Wokb zviPfP(D~}xcQ7BcIfi2TX949`UAhGm_q43+KsJS2Am%IYQ zLjgEA-CwEg4c)i0v}C#5xWR*`hTKxV+74(Uj+eQfo9}lDRU>Zt#kl4f?wK+?8Oh+8 z^MI2PF`Hv(YVFo{H{$M0|Ml0QDk^=xMZRGosbiu{0!tIrqx;glRj)c_gAq!L=rCfb zF0OZslMj47-y|U5(UHf~$plo6qYq5(*ss2uJ84afEyAxbOuNuzN#B1Z{blEGxhC45 z5Qzi(cMS@vvCf}Y=_r|+w7kIi(#6e%jy0S9{?^|GUfwI+tV@#keCq6krn*8eGDRFD zBadu_^Gji9&TuSENWZ|qe!7bNjmwNgL#LuLz4mYEDkJ0O95$eg88eb+akWfBz~T0hCmjwc={od%#SDi;<-+Gp zf2`(O(XExo8=K*>f@%hvVk#fWJJ{u5wlAy)w%4`?qm;Aa>dARowA` zZbNIS`5#Y1D>@-J$$qJL9-9Y4gFc5O!X5&lPoHm2qn9js3B(d({SnGWXnrL!NO^W~ zcJ}NECC?7m<&tg4IGeQAOOGdc?Y@n7dl#K5H&`KIM<=j`V3UT#t++?g%=zZou0+;8 z-M!9I1fCZAzdHE*>v)7r>;dlS#p&?uI`pwQDZKPS9O}0wXu4PkTX9(H@)K!%h!Gv_ zF(@5xxBNiK>s>+C~lQ!kV*j2QrMcLZ4jnZ;NZt1V_hYJA0A9iAU5GFt`#4ugT z|G_BgmwtnFJzipD`nMWsuaN27}uLAXe;pD0(97@n3eTVUpP5Ap=gnKB1)G|54T72UI*p-nQGWY5qcS()$L6hP-ffqwV_bC*X zi;;!)NN9@=b(8=;1C@3m(ug5ENvu;HeUSCcvHjxE1Fk~{tTpB#O>SUV8IzZP_P;_6 zI2@Tj-AD6pHzDUOhgOGObZ=&%e5NTN0%hK#Jna7u@84%?WIZfEb@71CZ59`*SRFSP z9&x}50v&!9GDv>=&|oGv>LuCjk$qyvkim0V-U7f1Z`%*zFiF~A_#f=Cz~KwYL-4N5 zve6kqLdmY3p-ne21^O0VU~+Pn71rYvKGx4v+VcK9PDCi@1~6rzi_R#WQbAB8l?8q% z)*;FpZ)uWgh&BPs)CzYUSIT;F4p&hq_N11P*6**c0Egg%h`F&h)4+c0vAbz3PlQW88_^svYo=7UEIh_++lnj-MGBUJ^6aKlf~rU|@) z8PE!0hu?VNH(3W|m(k~dje=?jMGPBfIPOJhPfB#5*T*x&xPsE!NOo;^CAp zi86-5eO1oNnx-hI@HsD^V~wG~ZuS{eov{(*P-m?|Cfl{3lYHg<_L_!6{e9dwub_s6 zNv{nU&IjkubNqO%VZZtYW|QdVp*KlML~l6Sk0$h(n}4jc=Z;>sSypRl!t%QLBd_WuVngEe^-f6~ zCduvSd$uvg3MuaZF$^=SWc-_%BZzEbI%vTF-PQNjl!0mcGQLx8^kGB)tTl?x3#GN< zEFfn;jaKj`u>^Z*@vq}97^l6?b~ZIkn&s-cB67hSkT@w_fJ%1A4z9j63$~^yx zvFwAZbDvb_)Jplim(p-O{fo8!zvmZ-YGtcVqOVFcm^IsERoC|YT|3L}&|wVk%57IfWgz>li6^t3$Pnuj0{c6r9OH^Z`09butKdyiVI zHLP)qh^XMEyRYB><}6wfaT4+-Z=5=#*7;ts&irzba8#t(dQQcdN=sY!zh65iOJZlfa zS1P_lj#KYxT_{dTTQGVfyM#3A_cdJbtUKrxmfHC}ZjMqSsf*aK&4aV!&uLXi!+564+O}n3E``A=;Ee31x?6QK= z(w3*oH5jaQ$cju3TyhC012jUY2LeF|hxmf33eJ_Dl4=Z^64POV<{)l`_x257Q+U-K z+!JN|{`70_o?!vqT%5D#+|_KF!lSOmO+gb3p0bbjm>9mUX0$^oHg?PypuyE5(Z+y< z|HXU~+PN6S*y?K<=Oi1j8*S*)$;qU?y1q>AK|`h6O-q{i>$cy0oZ6_S+g_K6NU+(G zP`5@c+O;EIdp_gCyN9o*Dp*ieH{DTPerHGF?gv)^z`c)bm(gG##YsteS5#=d7_Yc3 zEp2*HQIpic*JoM)QMz_jw{MMcF}A3;M)Zy6^ZjjAV`5w?&P=z#^s=GQXNnk#**LYm zdv-jF&q}@>m9DXSx5%1x6+Q1`mpJbj&afmIJlS9zPnDmfMYT2d72F-;kShqFV6}EIXTDe;wL)V)^Q!Q-PK<#Q3c>>8mjtbJeo^4tFmCMAHcCLxXQ> z8AA)-rWxYyG}-B#m5+s*G|{ak4=@eJKbXXzthKkZ1AK(m<=h9`BZ3;7R6zDYpBHzR zzFUT~6VKvux8ky*4?cl$Dm&W*MwOxgak)Fg-W0z-ekG|wO4I)NN_G&+&{9NetR10l zJ8~;DQ5+Mt%sjV{hfEkPYG97@`pUVIdr3GLv#d!@l2@gkp5DYslLSrY*bEOBo#sbI zJ&}fdH@0o)S#Q@+1>{}Z4%~(tA$%*q&u0dX5(%4!Qu`B&zno%?Ff(UD6%VM@amo>Fp{q3w|bny^=#Y`9^QQ2vs((w zV_R7LVq?)|*{T6Dyt+4)rLj#FK|00`_R5@se!bSbq_LwxfE*pB3zSYvh(CS8Z*+YqQ_sSWazSwXv5G`f%>i95gGKgN{2pp$)>TAS* zboRt_L;!VHS5cvwDtm^;K#qx02ZM)7`o<*GSMF`^Q%!YStqTrx)@NFCP<@oHg9EIE z%Oyv|^pJ|4+K({U)6psl-nW=nQpm*rts|n;orB?82`V_kLn`7mtR>YNd_3A~3io=d zPB9&(eHffPW1tR4=rGun&|>BUx6GLmg$)Coc@NO~oJzQ$Og#Z99##>Ly?W7r2pt|5 z#5p6z)!@Z^n9X3#jS(1MDOOs2lIGn~wk0kMSFi4-dvTET*k&=7*A}x|UcLu^rz&`F zDJiqK?l)YIiFoK!2-WN>B;0x2pB)ZuG0WNgU=4i=bHy(iKkyNGdt<=MM8S8)7xq7iw9_7{>?qKU_kD+H)DA!|y=`qS z*<6ixC{;}$W9@5$p-OOQs4B)OW4dYZTy2KWiF|9ct$o%I_P1FGh{mN(G`IjVVb#3E zSy{Z@w#w|e*_z29YMQEZN z({~NV)CfDKS)2^`spr_)_A#Die47fK_|0+h*3GW+={Bl|l2W+Aek01_{(sxuK4_3>bVr}BKRgTtR*F8}y8h^U} zE$qXTB0hqF8W1T12S+=wm-V3yxC)Su5P8s3f+_9C7$mvOJ93=1gBq}2S@YnMd8T}9 z@p!z&C~GIeg+{on%!|PU3&pOoDoj(XT?bC;m^?zcX7iL#NZ@w6u8G1Rj^OZP^12vR zi)Wo1huVNgmg_uKFk8Cw_cy_M#f?g>3UkC5p+w&%f$>4UE@XI&66wc;gFdY0nHwDy ztfOUvwkF2S%bNAaQo?9G`uZaZN{g^45Jq5URwTU&O|cE;3Ir_FmWYBVBxZLxia$d_So)G{{5=ui(!O(|FWhd_ExC0c|rRyzIWeps{;Hjv_puWe*PCATWqw0lX&Lr z$$#BZp>l|ua5|-!et_iqZN>6xslYjmW-u33i}*-T=aRSg1pW(xfAC{y8wb1rZ#?32Y+2VbbS*!+9Bo zot<4U)LAQOp@*`-5+Ml_M5_F>ERKWbE zFZenT`KtchAo)DYGT>ii;0ibB>><^yM zY4W~+(m;fquKL%PQLOuGX^3AGBMLqy1q?t7`1gYHdfj53O%Mf*Bz&d@DwcO^VJ|^p z9kRjvQh%}I+8O|K3O43Iy$wP9M4wMM+@D!(5My3iNgD(*3566ig)AF7Pkc27Zs!-< zV2_Yn?t-pgO+GN;gg|xgs+yBu{q8SU@nYTuqMAcI9|s8C0yshVpAylJFzSgX?T(5@!4Av;y~Gymw0!0G$%edP87loYHKG*TSY?7? z&6%(N;k=%hAUC+EA%n^htQ2%&7(y)wU$Gtv4L?6Wwe7!iBSXG=wT7*ysZQg|s!W|W z?LA0N9#HTqvpo_Tg%lW(U;P~~ZR;^#zn+PbrWML+Fb$jT+O1Pkw9AsmlPy+9U=f)S zy|U0@5K8|0(RczXXSws7iID5SkO3VJnNEQ5hOlWXuI^SOo?8rc&tIt=^uK(=7)-z- zVs?Tfgrg3wl!X#!uZ{D@Ne6Pl0?x4$CuXY3tul`L`nSH(6Z0F}8HW>&nQ>!c$|ijK zZ5qz8nq8Q-0wIv=6+dn<4NrP;?sr(EX!8DF8`Qn&xKfBi3RifC5M`!v3)G~G5QzA5 znqC&~>o2R_`H}G5&br#mYlZoRgtLjQP9p7^U8Mg1@}np9;BoBf@3)2<52`Dqb(sv@ zQ)spY^%x}uuybV(JFeh=`ku!GXV9VYowC_PhXp|FO?S7>!dgTxa0mU(q%;ASf917Y z&2G_H9n4S!aE_)BoR9U`i@DXAZ+)673@bL#DnqX;VR)=CLVFStdVNk{TsE5#%;^A+ zV?00IdBR@@(qVMzH5XUY6rm!7zac$8f5zY`z^=mIeV3oy8?5^E=f>+O&M|=pNTg`l zhQV3H_yu0LTZMT{F*r*L``X%Ro9cc2S9GII#m?5t<2`5i`5h^fS3utmg>xb7(`9P6 z%7byClDifV<{Db12$_|E&Gc7GFq;wH`PT*b;qfH+9o#6VoInSQsr}u~PT+#@%AvhH zE=ueROkdvB)cYS+(Jws$au0nV}bHW)lCLth-L8lBV8Yy`zs;ZOH@480x~`XEFIS}$9g+od(on|Q1;?U&)Nec ze~g)+(fRuKUR9%nnF1`)7A(LzN?}{5WaGYIb^s^z-Oqm8MRurMXEDCM)YokqM>q7; zd1J`w?d1g+YIT+-@tj9<3Nu?kmy5pchJ#~;Z)iJ}a4=A0^dhernjA z?Z3N-2;DyTr6a?*oC2RbUOC3u4Gj&QJ?7Yiu%4h|Ed5D#9kLsmT@$`n-)j666NYon zb=JO?^ZWPHFT$Nu~uCZW2+HL!p4qFv&Nbj{n zx&1enk^je4Ub>wY>K#*_i|HY-Y&d7`#ur8DP|WXvyB>0nmwL9VJ@+*gGj+o}z*Z5{ z>i=&^e>A&UWA#2|cbRjs(r|Lab_JdesM0~uc?&O@-7eq0npPS3RhN0+U4cAj20!UG zaf?MTKoLev)z`KUBG?%%;}t^@6;d#Z|7Ak*jZoUjFYM7TV!LUIP_Ny`2B0l?yP7Vk zjT)TPcL8P*Nog5V`GxA3Fn-lfAIn#Yk99z=5AK}+4#$d~6h0d}4w)hS8mk~9bNRLa zb>s7|UT3ozY}7ov=o8~jMLIbQPG#lRKA4thTMQT?qR&){l(%1Zm&Wot$6Q#te>Ze6 z#0Cf0v?ffo=+>63{@bj7JN5iOo-uBo@Gc9)jyDACb;NtZa*La{(dO$qBv2=?NJ9Oc z@YuuRDbJZ5;?pY_9ycCQ?wKTjIn(Ld8di4*DZRgLQz7rX{|NSIOMoM=#5s~z#FD`@ z3UFv?=@vxZoVIqS9{C^ccjx72&bfC*3C8y9p>M?fSlk)m|`cl#<=l+ST>g0G3D|FOiW2!aBP6K zhI=E+FjFd(V=T+lU{}5Icd!`4*Vp))gU**XrXvCmJ~ssQ0x=aqa0wX8CH=X6k-4{T z-^R*k`r@fdNIxM6vylJ#w>Fjuuoj&@eY&Ki)eBKNlq7DiwLF3V!GrpnN80~q=haxz4E7wr(mvud(#eY-7C@7;Blm$e zBWkZdkL46foU#6&Rs6(OnBJYkENAACIeFhig{1jn_6K88$OplOh3QmzjwaoNK}fj7 zMiEClng*p%@WaE+b=BuXWNa{V+}!>Sdzm`o1J4P7C>^s!_%EJ1b4EoZ>a6oAxFgi0 z*Wo#=h5-PkSC2&xjD2VkBfnQX zWG{w&P)}qj0X*DRTDH!0n)h#M){G%Yr{G(Fyi2rP6A$dUlzFNMbB6HC z0tXBD>Cr0b;HQX+MuMi5QrV(`p#(}cv5(U*A@3er z<-k)5;h>yN$ft1B#%_pz7}8yxmcn!@t;i$F^VnF>yD&!r?;g*OoGOkD2%hpFzg7oz zX&6}x_Uv_ZYt}VVPa_-~r1VHN6X7yeZLKG`oR}amw0z;f3;wXOYH_;#x={gX{4)1z z^=w3KH(xc|X#gdP`~Ll6upZW$&RlJe5hQWgDxuWn>bpkc^ONU=_Vo0KFWPzdgD^~z zgm!-OxwFnDp0n<(#5~nY^tWH= z$Dur0&eMQh3~olU>C})P76(Y_SgAYUNCOl^&B9y;|0d0RwNh|(f5KxfU`l)DGxI&f3NL!FLj5}F{M1IZPydj~r^#0k~{ zM=x@Eg+>|e1Dy9OhMg68=DRTeguyW=Z%Vaw<9q5bYe4`77NeHS^BbrIQcP82qKO5Y zx&!7I7>E49L{)8*@PPmHE4V$ho+#CqZ6^`za7ifXU!}B2df~!_^rK=^F2m_a0I2;r z_)Ny|>3a9}ZIEs`hF<7~^571Zb32xsyCZ+!ONCN{>twaAhNs|n+ii~Gm2)&anFK~Y$2otv$&aV!;&!zO*l z)is@7%2#BFS6p%NH9LD*jWcNi+#9QdZ{)lO-XrSVbz1LZ6(#Q%GyDdXG&N22qu8J` zAOj4Ekg$TglDJ7xP0bxZ9F$@vN#z7X6?9?%`rz*a(0`*Hn_e-_@Y0U6eTZwG!b^D)K5<#$&;;*M4u^Zn!~s@HMJf3J6VlA=mrrxTCK7Im3ITpN~~S z=ZPbVXd5wZgCLP}^4PItJ8$k&?!;iXYn3JuUx(<8dQH|HM{A2L8T=e*o)K=KT$cfw z{ua7D@)ss9ygBCUW>p_uQI+Kxr*WI<<0szg)mK(PHatK`9IR$Iz7W_R&M^Dm-d2S% zeM81cOc(pxs@u;l?@Gq4hvN~j*NnGnU)`E*`o*}>con~sa3;YzRI9b9y!gve!Z9KJ z@{P>7UpKBA^$yJqVN$5l!1AdV4s_&c|rh!+4Dz~SNEoba!N4#|7-A1rW>OkuHl<$ z%cszbxp%F-o5SG`Itc9pj>1U6mYg*1ztBX`il9(%eEVXdtnBb6L(k=I_NOb)R&A>d zRTxG;&U<`mY@k5hmU$a0O3|r3kqhSUvu_XNaudG)UKx9+ln!lu7vn&`)S?%Vcjyj% z=)d*;Ks$wY>H0R;117ca_}#S7g8R{Gp)xWeiHXWe_;F{$rqxy^i4J%B$x3=%jCglI zI%6AcP_HZ0Af98q{1B~YHlz6m`UC$`ih;XJT1AN&8O^Tx(*#&glX0P-@Z2nOti?c$ z#<~5+Fi#{7qxMy=LS!SO%yCJg{~QP5=k3i%*__!pE3@eNi_SbLzn=1ku^vziAB9e# zFNUJs$<8(hZP$*e0Va7lsC2R;>^rD=B@eZyD-;d9(!m+sQJV?rPT|4+F7~B+{ej{P zNTJV7TMCj#Z1f#;>6xPKWtEXUruD{KulDqZ9IvM94srDE z-`lGzkLRoK(M@M=*<#_wIg$VT^uy?|u;#kl4~h(W{-%M}_~|1o#HCr;CX!3vf(m-? zVE@ptKxC@T0rsyB9w%PDT+Gt`2W2ff7W|=;3M`sGE+&(*(o}}{PL)ryZFWf*?$Y2Z zQ3K(3MaS8INB9RMi4yQ1l`}+o%B>|!{t4T%_CLgui5Xtl&|G4<{nFj)T8pjMx5y<) z;{U$9**+gW1nHifLndH*9QWN?cu*}WZ0)!T=j#2XN*cA-wpTv$rH|>#zf#;=YvK`C zo+p)6y*;&&22Q%T$#DyXJPvJdb4l&8mpMk)cOR$nmISwghm0h{$b;(hq^b2z@tkYh z3+N7y3|(?f_wV;Nh>PvM0jk_D>zVc7TfnXoOeSe`ono5QXLpY`-Mu$F^6`Z~P0gOI zLXC^8y}|L5TD%XGRRIdRit~$~bB535FU694DQ$8pwf$T0h_*{}+pld8{q<<;(t8)A z{g}n;)(z3!+kZpBDsCtW)>)Vw2JS(ekv;2ozCPDuaozAxzYZp%^rx5@MNOGJ+3L5Q zN0O2pu))e%$NvjO*Bs9$gBH#QFtQEemgx;PP(TxY6Xnn~*jJZVG(1$O?$YS~>YYK1 zdbeM{WhKZtzk|46KFd1Ksn)i5<3Wt;HRq3!!0Pck|oG5kd3W zy!kN%vt)As^1yO}t}w+;^k2hhh2CDZ2W7&&Hrwn)7>^TX4(9qOH}#_*b6t1|Wo?Ke$$`%s5r6L{%fGb zQa;&1Lv?*|_k%OFRz98XLi@fUQ8{Q|P$fS^LFW+qv&_SEl)Z9{Z}PvbwQvhWCyI48 zN@M@?4kz+4ErC2&Hfsz~Lc-i37Y0~}Z^>Eki0)H1&Vu!i`@3@@8qBP2X`ii?-@F*Z zlteGz0~rRiFjYP4S*OpOf#LoHxVyAI7)2BXA-*`4!Y)O4_b?3M5--jt0*XSyh zTF#s{4N9I%7+#cYSE{`gYpSGp{~r{AO>^K`G+9XbHbD2Er%uhz%Brf=JzY{(rnT?I zZ9KI;XgX3GT!r>sMbr`#*+^0^!Ey(iWqy8%una44LoxUf!!*JBQe&&ri6p&0x9o1UP*JoCvL z2pOCjQCIxos=q0Qvj+bm#5hPeh6$}(Wt}EO-o)nzpdlE8aC!%|)+%b7&d#5+?*3_& zPTuPF_oE`Bj}LE56v!mK8m{AP%n4%>{vvoeg#Dh_{)w_C%f03I<@^2)7JuSBSUu+2 zqp$biI>DQ{z8^XcQ>gfoljph@X)WV?=?$j$kW~yzcomo|5!W)*g@7ce8&i7N!5cIg z%PvhdRs6$m{!`bq{0!d(orVGx_;KkKYWb((?g53E1uS@;2mr~cE> zzP%+6Uscm|V0?%-zBUfaeps3j00wYzaCac&KF>dU72S!s(N(iP3OZSA2I^f=RD16Q zp_~k%j|>8G)j$IM+wv>t^J40qs6Cj+I>41a+Sv+Jg;y8SIz1I7AH$O0M_-YD1LX!c@j2g}=<|@p>Vl*nj2*Xxf4B<6Q^lU|HOX zLCA2hwVg-UiN!^;edAaQ<^D)*7qIxm5a=)X z!e{Oud3J2EH6S(33x+u2YECy$pkwBbnVCRD^IUyb+krP zw>}{}aUzC}kAZAA-wfd-*l&!sQ2A@Ji(j#C)rc7Ck5gnFO%c}`Gxy7H9~zVhK*%P} z26zX29UzBcPtzBE{>KD!ly_k8L2&zbRxtiYO2z?f$O#7mS%2kDj9fj1HPEAuSc{I7 zco?0D{PYtbv;YJ<+`wUo8A{mRw6LM?V*ieyf9fk2?8MFpJccKs2F5Cs_~XZqf6n=x z7*P8GZw;|lg6l-&n!gX=@it4P|8bbeGAw_CM2OytkpOq^%NT8V$peN1HQEAD5a1GI zeJS&cq`&y{t%5ZmO}tO+w0p`m05$c%)n)Q`-vxyml`4k_W`E)Ge~;mp)f0F~(ep}G z?*Wnu++g$OLCEJfKPE-a0e~5vvaZUcM_`!Q>BF~z-pW+7@G4rQ3D3|!hWVemeu~mr zV^+n90X4OYUD86hb0o13Dn;Lc-=gQoed+QmYGzHQMnY1y?1FAf;jLT5z&9!Gpc7nA z0Au6UIGGy#eSnOw@bA5bXFe}`l{m- zdEecHf|8_F!r1vsFP1eAhJkWwX3|!HAx3C|MG!5KXZMz4zCCDcls%zMfM60NRi-1K*B5q2LM5`#tGKkdV zf;t0ULd3BPf1`m0gh$TrLY~XB`g$8z{474d6Uq~(nj5xmc;sNxLxbnFtaVgmy+qdc zI7sTV>^9=(Myn;^Qeo-F4^tQytl{l}Mjp<84McN`v(qWJgWIaH7X><#Lg2U+sq-qc z8Vdl$jC}aWiLvsfAR<9*1o9>*Fq1;OI3Nmu*HGi$C~_tv#twsufK(H!OOP>W`SdwwQ_j}QK%-&H{L%e6 z7ZJrA^jEM2UfTblRf3T7@mE2-D0aqkV0?n;md?obi;}kb`09XB2$2TCE;cJzl;ikW z1T-EUJQ+s=%t*x-C8TY3@rc#5TZl_=W|lNg>)?}dYz^IfI$g|YP0`}qimOmZECB-- z@qWyEtyj)zN8|rPvt+rj7$$G1bD;LcSYz}tzAhPut5e%KZx$otwnDRvX@;BWU!c-Os zd4ZCq?iJXyD0?tUo1uV#*RuVu?&BiBS@$RJ0r(_g7W=Af-?NCqrzVd}SZBO4PA5=TA0q(g3wTUmI0V}i3AmMLIboKZd3*W} zJ=^l*^%J;>!3r)-S2z2YzrpIkx?S=#2O4s6<3@kjvsMixE+IZ3(C|>zv9zD1wBKxo zG!hpUvfP&bqx&i`nM)rXAz>7;u6lFnCgwnr2m435yX_UEf|M6%X0_me$1{V=(ubx! zMjm-#N~+>?Bd#mq3SfselGKLNTu}PfDd|7|EG5Ok4THJH1B~kQf%+yV!~L>kEI90{ z2nvFm8!^%O@ki1*V~zyBhPZEWtRx!5a4;R{8TJdn9U;Hq#UxVdAMc4d0Vf_NrPX9M zgP1=;#S@{}zrrMO#A3g#wH1V?EV$51=+NprVr;V6wyU+YwBVQaBP31O#;z;vhM)-! zP}+YJ|L}I2;wcthcx(2Qc(l8qz$c$HhMDvF^H81PQ03nB>1{*xYl?jec6m?=3Ov;8 z?1@z=76R&}VJNI1a>9cZ9Ap2dw>}>hRV9bsX%h3zKkva`kzXTcj2?VA5Wz{6B4cF? zt>9)+?L1F*l*mtB_0j(2k3MyTw>h_3Api8~Q|^XMT`Y|3o#q?DAOxaQ3yawOSy#0! zf@bce;{#B-e<9Hcl`ob|sFv|1Bcn2_|hnrhys!_QWs2KmHv4`+SihV{giZc-v z@$VpLHTT0NW*;hCr@pNJS}DMl0(o&^F0{R z;U#VtqRZVi{U*SQaYQEc-+tGmo;p4pj4{!sVS}WeM6TWDY|+D?f7Plw0by{?2Ud}3 zbqKE`Iy)!`$i@H9Dj%0`)g$6*r#yuVAGJrJsjV@gDg>v9|(I7Gm+`)2IwJ%$W zH16n|Uwlc6MleveprFlMY!=JhouG5&EteAlM@&pt1SYt5vj0LT4a4$fv^c|Od@g<~I(sGj5k3K+R zG3-dl$mw(E3^!~Db}&&(z;Z*X;mDW#kERmK?e0Oo5=L-cj=bC0k~VMk0HD<9-8zfj z$z7a~_#Oo#q8LYB{qL5TJbXLU7|=i8+DG#TBwL*t>FkJS7C>!vTH}93(?%Zli5puq zFb0RT65h?Y7chqNLPH9n><4W9*GvmXqXNkX|EnVz+3GRn5YC4gBi`F{Tf3O^F( zo56dsf>SBz1c1XJ^sRC>{YdWqZ$FAjIf2=KujOLyO%&##SOD0swNz8tPeO+@B~JjG5;U0T4P{f07`oE{G0rinC8hHZ_}xAr@r*dEH6T|3k>!L zNv)-e7aQqYLOFVYs3-CFsfTYIzw<8HtSwD04CG;H-~d&ZmruOUnepYHWetyM*pl_Z zofmc}i(!^i9`WPnABSJTo5hMEygB+9n5jHLETfwFsxRH;x@Op{M4^_1UllBy=QMus zfV=R>kNQ*xh9AXaO?BV2DWHBacP}BNm17MfhFQV7`i(!D)z3Y1prDdXVh~Ra909Pp ztXHl2V{PoT!Wf4jso?tppGCDhXE`;id)AB@UI^)RpPUrdOL3}MRdr(emI1#l%jgKs zg!mjoARljUWG_7cX??O|GZ)i|%BN4OqJ$LwOqm#k!N@aDJ|CJ@*PCw=Cr+Pl@mzN+ z+{Q+poJEl^v)0T99F+(91z{l!4-Eoidyg0iU$EWKYrN-R$Om^IF&&x!B`bi7cJOij0lDh5Zl|Ffs8Rhn)vNxB|7KXxF{* z>{NKw0l-?Xr?*9?V+nWZa}LLkXssP8I;vA^VJY4NV7L%64>CkUr@KG_g%u&v!}3Z~ z4|>_HI^&m0{bw2TtHAgYMH%@)O{;^9e@`nQ|nsp><0S&F8iDoQC za9&W;70|R`d8f%5yliVik{^xh;zxXt&z#xfU%qhZ(g7GGY^!To4^}BbQo+qeS>D@Y ztZ*_GDLlXeyzxad=q)qd*G{_vmtr_?7o)}{8IWK-qK;J_w{PZuHudB9={I1l$ZXP> zJ^RJe$0K0l(^2&KMKYZ_wYflJMKs2`CjjKUut zqnpL!b=9rU>LL%bI%<2qY5h+ju0N0sHD17G_+y5kcUQ`tY)08Z`#`C_XnXYFTK(NCwe1?61gd< zgt6z{DEl4v&tRsJ+})3abvqct5QK($SfN&c3l8e3E=_gB-tlmYt?o^{Igpf-*YA0B z=F}3^8>%WdclQ-kE)!ic&odi?Pj0>sNV-6Lo$GQtv6S};PWjp(MtV9@1E7L0EslWq z5e&C|xWT%_^;IEtTZ#&{B28zbR@H1EFM_=O?VzWEp+KDwbVUgzTee%P0jwpkgkcbg z>)Q^Ee}-{-^nUwyxG|+K-BOCiQUUzqbIp;KDiLSR$`iMDaG=!Zzk@e|aZGewjLu+3 zHdb7%7FhQLYICP7vF0!`vqLM{x(<{ ztq6GLrolEd=tCsAPIHeVPzktco4ut_XZ$hPTj_vy1O`YqL2e2x%)$Oa!zhN>lKcBB zoctNo!@bXbyCh)3qQoXJ&+o!RY(W6YluV33KE^pv2Nf>S)KPiG;uP!-krJxcdZMB* zF~TIut@}HZ>U48YRKZTa7y)%ah^Jyx*{hVKI5RrcE~SOU(XPO`-(TtAT{s0j5MAl<07yeEc7zdc&C=k#8zyg% zgeiZM*-fN6aUTQt5JO7x`PeiIIuEN_$zgM=dNz&(Z5~s$*uJQ@#vI?xNcfD*1mZ5P zqJ3m5+2Rnnxv*MLgL9)jqriYxdldW{yDbsU2F}hMShTjzHfXY%Ydk$YP5c*@5zFtckC`74d+@8?8`F^)Eg6MzM3ltxPNxzoqF@_9_Cfc=%-M!(3H;c>-kvl8AfBkWm8;d>!jGYT zmsW9T1LE}1w5H}KADu}bYS!f@ZqnB;6uei%VFHAwwQ*L4Q(bPl#IBTa55NZ~dqg22 z;dyzUxU)p%?-kRsddi9ec9`*{5_dU)&H;soYP_I%Jmz~XICOHq*&?Iy<4%<09|OPs z#8aV0G^8%TRA{1RP58Y_I5ftC#vYtxW{+|oKh#2uy7?_u9C#DIcswNm%`ht?(a+rA zqi1n`w+`>RC3p%;lOA-wrDfQeKL9qeV(R`Br@L)in8X+@EIII3S`e&I4+s*QaEjf7 zQ>Rb!@su_Th>0jh;3CQvUQv{lVM?D0+iqrF(kn z$YBf$vy)RBtQXqg%j&!nXAxjNRM2T6WAlan%P;huJ0XfVZY$;0^8l_xC5;&wTrW1m zLrlVl*isf7JXd|Bm5oz5XOahuGqA0>SZoBj9ymtYVq_Hl4yVHzJuqYo1xaMFpCfnf z&aQeozdP$1xD;?Ygh5Qzt5vp7phgEvd{68!&d)}yM?0FxDGB}%2tUQ+&WsZlNgnrL zDTwH3-vd=^lZ|YM@B1$)bcivvv|!@jvM#;|NmD@7JW%!IOuBu1)GYL48hRFE<40+Z zA)YZzfu_oD8bGIw2nsolD3ijL7YL0)k{O2qUl12TulA>8ka68cH|NE~kPm#&Um+`y z>*X=!!WxqvqwTI6@Ugj!=aoOy))!`YF5&~$ zt~#3zl&tF4jp4De^?>)lt77MuK|ujSEfMVqQ`|``Rp;1N@{5t(XSUB9G0X%DEwRHX zg&>@D;E+aaV9#A`udMk(50%)aO%m$PJFvHN%TR9!uv+`N8unVOPq`UFrUf_P_}QBA z$^v%jH_IONeH#n+-+hPwBx8=W`LOwjJ$H_u~CYU-&eT!&10OSDzt!4zR7BYdm*Xma-} z5lgAbld6BUK;5k_@;& z++~V|M5Z+&7N*o2y|_y=Qujwa&vi*vzQKiI%zK=6A;OWVuTj{{6Vx_v_w+}YyDm1q z=r=qBVAknQGu}miD5h?{aoRK7!^e*|+ZZ960N<5(jM}`xV0l69rR%SGWXPzdYz#EC ziKPFbj+1;u*vP2QE&_~16{1$*h8hl1OlfuXnt^v{UYlZ`y2m9+e9nWF8deJdaw)|lb%gu=f6RI!jqm+6U-2G2%b ze%*xu4SHlr=sL&YOqTx$?6%So{$-m{>I61qqwBhnrhRP%@<}|HcCC-wc_}CB6(dV5RLlZb{&m=inKzU z#tG@zrJY(kuB9&X?2?!^?W)M^NQtr9@9s~99Y9cfD`vLyy>dgD< zen6R+bsc4@71Tc%Oh7~R7JFgK@OG^y=ws3Tx$lphGjCoBRwq&apy7PG-e~5MJ^M

}(lXqh$8gd`oOUCPg}kG%{P2_qOv#WN-YGkGcTp0e8;cZ zDm;<+G80O%MIa6X%UtIuv(2kO{Ng=cJwS zJz!B^P<1$a^VH8F;o&)ut?5d7-yfKM8JCKTq*iZpKl`(c=V^sFue$@&f!-n0lM!?) z52dQ$Q6n+n2WE3c#Nua7l@EA?5Cp6w>b`d|PMk2QXRhdFk%xgX0a(6uM5u%fA=$sk z3uYbE?|J^*TGm2n1umEgy^#3_ACMjRhkplsXrgc3?*EDS zAcV)qb5N}5=3K18kQTYQu0b3?t7FyNts(^{m{9z+FlLcDYorMivY7Mftv;lj0>f<- zk+)i+_hKWpekC=#X>`t!FkeZjeCVD62uTtMHjS_}r6yMw`grC@Mv}~BBhiCm+(mup z-xevqMIDggMM&E_W38FLjAT*Tg&zazM<(|*Ym{$c_4akEE0tfR4LHl_su3 z%^h`l3zd}g%dO^ptUN)k!0%>1gC}lF3}DWSA{LW0?{C!C->#s1S|R`kv*t01xv@va!PkVh<-gCK>D=H>+DcRXkg z!IY>UxVZKS+Gd!FmdEQ1Bh6FAK43~)K6i@A4H)?Cz&0n_v0{|_XDnQ?G2=P_UvWj* zBH${R>nEebIMbYmcyg4e&z~)Mhob+Wk58(ds4TpWX6n}8mivfuUCI}Tgp@MpkOX>3 z$u9TD!BkH0nn2YaUEwS7K*?`3BGx+F9pjb2fH|e@g8x+)_ zmPQpx6rglZ)(RkYmXl|Knj*$8=Wwj8CRc4s*8_xz4<54oj zM=3Tk+%hx0dEgDmd8vJmGH$kEn+hE(mD4eXsr(M@BSI$LK|!6GvJuH%vZ8iZ(@GCS z4Z9ee9RSw0u_@gi$D0oTo9Z#T;8NoVvb$o*(m*to(;Yh;>~R>=4~dsHl9@oRoi+|? ztkiW!LOBB@O$^ZwJmFa6gI*uHjQvW~>qfz-SfYGj9=$Q6>|N>fQVQwDi)&6}L{_>X z=fz#zriE6wywLlK_A4UTJmy=QX(Supl)evH&)fj*nYsAF3^*~dg8Oe<*dZRYuz2p<5D0T9GM`QkNOy% zx)P$ICRw#{f~T00HZ@R+H`p}Z;{G1B#1W+iwo^rvL7wEY4Y9YI076Jbla?iWO+<7d zD(A`XMHX6-OkZ^2UF>XKrA66jn#r17=QEWAa3kuWN<|m>yU)^0Rhu4VuK+fM+4W{& z<$Hh|No=r<9SQ2g_9Q44fyE&#IA{3ZM-~afGW81WC?PS@=O0E&}ACp#!9ew0-DNuf@CiOeU@6fZztr> z)E}$x;;i5AojHsgmBVAfvziHI8=QAG_00h}#ZAZT(ft6!rSJRtpi8=gwihlUw(FEs zrA-mfL0~pkSDJG)7&KW!Syy1>y$Dh1TRF-y%a)l*s*u^h$wwf{14Kb1a0Ytkj?yQ= zT7{(-^j)vO+?Vi)-WpCm2IYGp>Z+Uh_^(zZBT*M(GK=nH|TYYF_b3*WybD)6E5Z#VD0TVZU`Rp^& z{YP-_V6Ul7H?JM|iLSuRD1wq?zhgqDUW*1@8VpK1=fsz&2w{eO^quaDfQM^~>W96( zx)vjubgA#aKFy?evQ%47>FwgbEyDhL_z(@*49CitijOzAFiV(2@wd*;f=tolK)^?X zfX6`89`|a0k{%F3ob6m(w^|e}|j6xXY0W_Mn2PAN?Hb$mRWpTeA~ZD>Jb2@wt1R z-$;=doL(f(Y#@*i;fPa|UF^6?+{B;SZm?L}t1!I;l3F!t;VmRID;7`$76lYtzg`a| zha%cBKIy?58c46DNu7hv3Z60$9OHBV-Zh8_FM>6rmM`3rphummF@+&4LQUOzz(^pcbvy8&`t64!uE;^JFz81;Rd^pesoQovTFs5O?$R*2JV2Gv` zk1B6?fd#r<@>u6|ydsQ;krJW}hwdP@I`AGA=q}j}2$yOh`_PoSy7cw4PoyEwhSAX_q3xRwSzYnL z5Y9wJQ0JC7W`Q3L#${&<2~Pro#JqDGEPvaFMvwx8{U*0sx2y5EU!=u#oNas@mBei2p-rU6gOVvFs)r@IO8OHDl}v5}zMn8@ z#?Ij}qp0bc(PWlR9R&-7E%yKz^+mQMr=l0|1gSHra}R(Ik?O_`;p2In+5N3b4p4G3 z8!BFhwwr>%sMDR}r~Umy$EKnsof-KUrl2`5ULj`jAnp)Ow1WLcC`!Y|PR!}xV5@?J z-zip~Bo>GkJm+(t<>HDJH*bT43cwvzu0FIK6$XZvuE5x?nPk%HT=JSH=-h)<5Mhv& zzO?n|nowaahW|o6!%4G<%v-Y)%H=YDV%H+-IJ9s(qJTE7H@0HtB7sv%+job0IZj25lM)rk0jmeiS6cc`Rb5DM z@XO|AIbAg*WMEzgdL0rnY9P1*V{`sPGm{q=n#Q5;oZn09y5K1+=(xo0{%v{^YWXQE-fzZ$0-R6f zlRs0#l9V6Z;Qsa+SI#5`2NWjkjA#8luwuVSr>Dd-+O7d0kX7jJzF`@LAu=HC`uz`Q z6*pI9S`Q2S)^(xc#zgTZ260FyEB$7@y?vsY?;W_8Wh%cKvn_4MI~<0-m_CAny2SBm z`0#5ZdJ|SfCMsK^OtJu3I1Hj|U0S!>qZK9zdebpXHmuRlh{wq|Q zn#xRh0 z6QT*b=VA;B+#hBvdVz^aIvfW(=nc4tS*uARK&c7e${t5pKlUwfHmQL5ptxy!SoWy0 zG8-BQA{fKC=RIb5vsN73rj)-EBp^KY7@Iu<(RpEH4;DVieio~a3}Uk)`m9I-+Ta-} z1PE`qapTs~Gcp^I&oR5*myFkOovCTd>`KD?52;>GOOhZF>RZ(rOG$Q`EFESi36NU$ zcd#qV%hS_J+HvkSo3%9-4&s7j+Ok9T`e5Ws*iV?ND(4yC8AR=41ctSD0CWOyZy~pZXWXDMhV{^ElxnX_FfR#(Ha$&<_lXBbflwp z^;E&Yeo(Pt;rGL0p82R-eR1xo|8y^59+lH!AmB+2Q{5FeHYv);+49(zLdfn0 zZ-a(nb)%O<_z>dl2=8U;)aF!QKSzp1X+?#)@|vqxuUb5xK()2Coe?>kd+NoQOc%|h z-=pgF^PlxbZVz8QqqpZlkpBw5?+(ldbEKG~eSYcwSHuoyvC6Ky#{fjYk8Ylpmc@nd z4`amsSCh6-7ZH7G7 zrNG!=nE%KRWOxZglG_WDh%xf_DPah_6;#*KU|8jiiBGd~6(R_{4=1xhLK=C=zf_4I zhvzXZn}Ruj<({H2_35HJpb^K#amoz>M)8qqwrKg^uLk8$A36A0A(=zgaO>KkH&jzw zL2w^Ji@xxhY_5h;y)`$Y_4=@&RR_h>Ta?AOyxxIGRy77TFQcUDf68e8`lex=GY4?$ z5-NRRDylx}6>Q_7qa?7yDgS`C{Wuz(oR+qKM~;9i9@8WK{emev+xQ z#G!A5%ZwQqV7T$#lA#JW#_%CtX#&>x;LyH=md{WlVJEGLQIhWn?;4RGpw}RKn9%!O z)l@~6ewM)JSn-@+UR&Ew>a;hBF$6KjW|d+J<4+ViF7{{& zB2pQmg#`;bN(6%>gZ$6QTI4;iKVW3?DGX?C*}iNagH4;N zR38x5o`@Ux5`as3A^gA(?EL1%E&PL~=4Olr)8c;7)8De?DIAxG+5wwHZ^1VgXW%T1 zOiF~vsdv&Ar%z9IVl^4SbO;WF-xZv6maueFr{aR&-i=+9oM%n$=@LTI4CCZVu%F=E z1x%Lg5cE`f!cQQp6m;g+c{z>QC(S+m3PBS{4KXp4L9PPH0sIHkeKy0fBC4kj0{Z}k zK2$cy8og$E0VjNHyq0_E)U_C^$DOqSe(T%TN61qXGXfd*BUufCtcA<%$i<#%avn!=> zX~PPCazFIT*et0Xvl-iqhMqfl)_eH$nqP3)mb%}(yr4VTyt`{zf1@0dowrjmJvJWc zS^6-)xx%b^Z*97r#dNjqYtpp=n?#ny^y*g6%xkpFOSG8c-d$MGx5AUPL7`BRnt4XA zcBnmY4>ppyoq|3v_L^HYymu82ooIJW_1M^z{$g8`i1(_D?SqrO7nN8Vbp_#kZHet) zEtb_;+!Nv(o|HId;|2TrEgvebc~%#x_Fu~%%vk1GvLWfvnW7x0@b(NwjaS5?AkN_| z-TznuFt56bc`(xu@f>p>nX>Q%QNAMj3BAdmA!GuioFf~~Jjs;0%4Zr{eT-$RxRYIbIwp)w-- zxv|R+O?H~kAoIu6?JI0wHV;33A**FIFui(p(~;34k7!A&xx`Z>vDw)qG!C&w2#rsJ zy=cgA2Hl0dtNz-(ZmYnxHG!_9JNkYu@O_xgI^u5jO2=e%Q<=GW>0Dk>g!Z1iUO{r@ z>`71V1#>qm)Hx@1YasNRyj3jUD&7kvbs+@aZo>X!4;?CPpjR%taQ^Y zDS8Cs3J2NzDR&JLTCCdKt5@Bx@w6`p9DnuZfoP~t`_SY6qZxZsmbgbSKXu9x|BX3r ze=nvg-0@;B@JqH{;F;jB7e?HD_(k@V`TLxT7uWQ;4>iWkS+VEuM8K4lnc2=tB1k7rcrQSN?fyej6wV{Nv5gTo=%*FLyk+f6bw7sqB=E z?925&k1J1S2OfJ6Y;jR#3p^{#H!#MY4p#h{L&n!`cy?B&P-8=tch_D0<0C!Vd++@S z23_CA{KXwTiJv-opSU&nYoo2(QMCmc`yAMJzSC8&Ei^3BS9*!B)m{)4Zv z5Ew0P5F3G5>1dWuOO7oMhxEnQ{fbHS0%H9wyTndw-qU+wy>8U;)7g!2xlgpKyB_rk zd_-Xq)?Jo|MgOX-zk~N-9NJI5pbxr7MzneG%^cQe;hKdRG0)PS`U=^TXFQ*kE5^7@ zs_vMs__8O&f;I1=y#C^m7Sd-eni9XYhr^BLZa`%UiEduXYaOd6JwLmsq${!dm~Q;= z)Ed=8lI5>`89uU2^^(fd-zod=+LqYjx6E^lnq|%{aV`>hTwp&L$YD*3ubO{aXEa~V zu0GY$-tT;0V=+zCQQ%dx@{qjK*P%|nJsb1)?}l8{u~HN`a#*H-Ix)!_yNi`~5A0tbO--pSdltJPQwQ7?vUTYWy3JhxgWO%Z_EdQt9(}W6n!o#0t*w)=W z3Vwbo?oWWpi$pWNwOTY0*R}i-TZ)gnbouhKhRa}dBGB5zBqUaK60x0bnus|w|AsOD z?N4&O#O5IsWcrkeQTXj^zQsBC|9J%y_9aU_!H)7ebbD{f38-@kU9kUloEfW6PQ+WK zz;y8KP(CFb)p*wPXHJavZ{PD{%0JDa{&|iKW$&!glW3Ru`z=G7^~TZ%A@+s|zGGU0b9nu@~ud#yl|gzp&q*TvI@; zc`fQ*tK9G=G0xvKjBZ>=qUw&;OlHh7i*2I~v}vlJU{|(jY6_}mlBZ2(Y~3KJnnBh$ z$!>1bT%24&dh<8(;NpV8F9gQ+UULWHcx|*GKagap;D!aH+$05^!ynu6m3XeaxMS-; z+RHn zau&YQtMjXYI67x#n>3P}6mP+MxmXDx zJ(Es3yGBpiPdHk;b)H4$o@f8thX9(cu&Ac1?*d0}-@YsH^guk7qO`w>j~q|axb;JqF6%W45^bM`>1+W>xY4>i|V}{lW6tBKj{lFd?DS=4j(Vwzxwx zZoP*qu*N*RKT%I6%G9joGsqVV@Y0zHruRsrVL3mtBc?E1 z`YsGuLs$rMJ)NJItqU3|ZjUz&&Mo^)ss*ujWUBy~=10&H1s_QP)_|+2;k!Eb3%iBJY4FnjZG)ra>Kf$;41?kF?^Hx%=tq!^l1B}JJxUd6HkQhi#Zn>r}-5kWP zGUD0j5?hVpE@018y+8n~b12FhW+{`Tn$J1~V@iGL?BYhf09IG{AVrKjN%dstuy9c{`T|8vyj8Pz=_otCL{Dh_@<~YM)Li z2rnmv1w7^oSI&zeCfnvs$;rvUG{a55Vmj<>UCa0~G)=n6m*7q|tiy!C4gHOHu%@DY z{>f-nvy~^M*?HE^Tg1o&GD;9GB~J)^6gVFq+GVOVdu*M}&tfI&Y(g3VrnQZ{so&mS zv&BzlOSfYGCd|yp=)r}Wcj95|*{74GnsaLZARAJPa_x`3I0ZE(Y~%{BUHesj=~Z*t z5BX8a1J-CR!3FUyw57s6vpt<2UY(i{nJSFoqKb4eOM3k9=#hE_z%gV9>%zmEkT}ao z_2!%#?n*t)hU?-T5HGPIH;o9R0>0-|Q|4=Q08@#KHU;pE%)DO13KpkvX-zI})1~dH zC(!O|fT9D~7K;|x2X|Djb#8DH4rECu0NGse=ITJTmfKFF!cCZdlrXYQsf89JDPNY z;?@J&TYQALmHwcH^I#DJ%wWW_>otLVWYc=qD@!xeIA%1Z&og6t2wx zBnKVA{9LsaD{l7>mfoIMz6WDN7BXlIWE(c`%?BEcCO9BLy zd)_`FP8_cqve_PxjDR+NQC_!+cxj0NVW)^Q_Pb8s7UC`%)EYHUm-s5h7)3=zq3|2N zpb>i<-VhRj2k$4Tj_!|o3V>S&ON#a>ShT%sE!ycY20 z(sxTZOTVGc=)LkiZ_*<$h&Yy_^|KP<-&;6N2|-D9WIFJ0^s~E~f{@@i1tU<#Ziosq z+i2%;4*1GcXEo%OTNc*}oo(rdIv#cCbJz55wCf`69oOI0bGhHgpwi#g>nNrv+!Jeo z396G~S%S-2a$2F2XDrBdeYV4-l$aeh8;|x}j*eE24nbps8ON8Cug`HZ75<}2{CQ0c znXlnTXBrq(SJ!ba0+UoU%|UlL->tz!ZzErhtdv?;MbS$5`9sD9 zw;iT~7ygn8F+)PX!&e`KJI{c1>`p;1=nN3%9ox6>MbGy{rine56 z%Q@)#SA(iQ-29fTJE~z@>Wm4bU|DuAEq6!&*1eJ5P=}@80^A1h^F0ZMG`nI`cDP12 zITuSnMBeBn;eNIU;tZ_yILmabfXGTXomP8l#KYhEw4Phsv;Uvid+ee zmO?D{a+a^45!61=rCJcc0op(o@>=PcdHn;c3I=iR3X?I$|5nCz++l#mo(_N$h)RO6 zNAbo>TKiIkQ@$W5gc+*&fhxoODjp?6dC_~%1^dY9fSDIIW@Z4xKmeAi)oo69dN1w9 zfc3+jf=uvn@X}iHJ>mk!qMv}>OrXPz-Ie3}x^LZF~-r7=K;dIfP+gS;{Qd+!*<(v^qB^>X^c$-~F2>a};vhEPA#A54#b`10We;wpt4 z`Km<8-zl}_Cr*4Se6AgX) zGZa43r0>%ceP=ho!q8)1x*Qov_;a4m4!xZu`CC@t$+L8ux2G_s@9KfL|8}+BbD7N> z9PBkaNSMM7SdY>-FocoKLb$jQ1W%C1ajS?VoetDb*s^$$@_qr4kAeqttlsJh5jDVn zO}rSFIwg@FJ1pnYyb7?xK4;vRK}gxeQFK5rE3IxjpiV;?cKgiPSJWBOokPR|4*)~@ z;ukWYhwKrqdf1kIb?N!_@15~~cGLHM(@vAuJsyuI_EO`8o-bu2em8rcbSKuAr6jMw zY9>?It{_>~FvWk_vKXT#_N>}@)1-UONeT@k8eI| zR+W&l2(%APKFDA5iiXagKi^|^wGzEcDIO9H9>Q(~2Pr6Jl^M%bRB{YXu8?0;{{HRv zPiIL9$^Iq=tX%mu4oGw6&Mlv}p1;hjS-MFwg%Man*g5$HctV*9Q<_n(ln!}pq4FnBj2w8%;xljM657QAIcf`??A6%me z?i6wON-es5dyW6uWpkx!1jn9a(*tW?FO-Dnr0zZtwxYbU+qmchxeO?;2V+0u2G@NR`+ zOb0WwT9}{6$Zd3xIfZk7oJ|BgDO}h}ycN>z6$3107F4p}fNsIfk%)DB7aswSd*YZIsx0l>H8~2E(fA9QAOYG2U~ne{eG*b6-Nv&}e01s> z$(nee5U{U^j)=JDbxq3Mb-+5;BFjby3IuYT9-tzByJ07q<{uA@VA83kanh4O8;-3R zuhAJanrfgR%@qQq&rf+TrTZ!&Go=>?y@E+F(Af)Jrfe( ze25-!R%)c3yZSdN%Wz7L!M1!m%tMMSz6&=Lg3?3& z$sp$MQoaadC!9jix+R3?g!lz(41K=eyA&&Utn_D8G*1Nm+&T@{xN@g}ojSMJ2DYxd zwC|II!UBzknT@;kl(~*EH>x<*=9O&OW;$T5>==ulPWrj5A4)=~L<$ z9>rH*N!^!tHAX|r-f!60cJ<>oaW=7Q28WlKp3|bveElRn?*+|7w>7Ttu%Mo2wOq5} zXXfTxds#Od()3ALUn&WLmyP}zy3x_5nGhKn$vkxz1YUyrWtgkGAPr5Y@sL*zZ+lo~ zQyGC=6?;Enc1@PHPH($hl7_3{(U6$J!^bE;^}5NA<+KzPHQySXw!2S(>Xq}H#0;se zI7;c&PjPE^9L~w{;_6D^I z@8MO}cik$_7q=?}cK?(!-n-I2c>`B`^r1!c?o3g6{EW>}{qv)}MqU9HSo?lvYwhvS0D2kC&G#x1%G!?|DYUrI3mFlBlCFb(_53j z9UBqlx;!cR$tK#hA+6<()mYXf5ANRbsx|v~pT82Bb?S?`R^E}T07TYK`4eamXAt3Y z^a}`IYfuJ8G#SDw5Yusb3;%6xE*BqV9-ODouP^}p+cNHKSyWtf^hld}pQJl^-rGmX zwhy$ZDq4}}O?fwDS4oC#NMm%A%DlLNsNPB3ufr;HGQXC#zQs@E{TxD zg@+eJD-9-*lnT}+7ioGQ;Wlpd>9wP*c!@8E!J=xps!KTwjCOQn0*3i@~nN0-+1;*@($E8>NUO!T8S}SaPFj6sGVz=Bx zixJq{t>V_fXc;P=?B#@tQ`vCWEPU>a?)sYOc}Y57aldH1xxM%)%b{|?`WvJyLta{h zp?SH#hEs6tvutxs`z*sSamwMG$}#t6S6Kcv#e&>-uNpBCdI-s4F6rkLDk`!N5~9KA z+w**%%&f7Tw+DoX`y|Oxz7<0>XkNE_x|~L~siOy(I!oiY&#U@1+7*ygO;XgR*f*;W zJI=p(YuePib8|D<*F8;+hUm;p@XwQ9pP9PQsKBa6^SB*#R-OMkm)dU=ztn2X>wa3H zpq1`BS#RcT)Tl@5-6I$5TY~!hI-gIh8x_V;tKqxZGSt?<=KEMTA=b}*%Q$_R+UvtH z5OZWTNsrwQ83@l_YhL-4QZ~JHv-UxSu}-ZVS4ZEz#uXa&l=iuEhsLfOUdRvj4jJF& zaAAG$XfWA6QJE;g!Pbv`1sD6&Eygq7JwE$)uZfA6)($JSQ5d37z!R5WI_iWzaJJo1 zu0m_snDeIEA=8i_c0Ka25MKP%j9hr1Y_C{CYTEHavn#K=rdM5Z%v^Z!;vQ@#d1sV| zMX$ydJi84Gg9Iv7TV?eT17Unb+(Ysyu10=D%>u0)-@2$7cmybdXT&@Y94;ZYsHiuI zIx~5%m6wJ$9r3iwIldcZVQiaEL!>b?x3t9FTUhDA^#-%@Cyi>I2X;!BM7HE-OZ)5n z{fyrnD^OnUtKQ1mqv!vSepq*MeoaM9x|L!fv2{hyyhJ(cciyqOGTEiOd=u}RvZZ&A zKa1V0?XR_RU*-V2RFyuGwO?Q%hLP35A4$46uoBz z>mjs*#LJH`@4=-2jo=)EbK{MIF4}<7U>!+sS15aYXWo-6-l^9Wsg6(AN0?gs> zt6rC;@Pbw(ZthB-LQvOhWP!vZ+Q#GB%pbmzFpIokaK@w)pU0!?l zfJOyr#%pZnm(Ni)3!esa>!wvW>G6+n#QfAbqEh$ro0i-Z-h6D66;<^VJF9T~w69wS z?e)g%<~6hAj!&=1_32ri;hh~yDoPmZ?QeFK|N9p(wfjf}9bb458oWy?`yNg^8?YTM z@+`>uhz!bBBcl__DPFwgB-T4cDbin8)&mW8`b zp1H1*yka7HSN_DP}@X(P4db^#(n$HLl6?S|Q+NSC8k#rdxD7~0@xc~ytbO9Br z8>fL@v+^G7XFL_gcq)xI*|>3oV@a!-_ff~@j=lwj&XNlxj*ZW7p48uGoUn1Y64rLT zxpfd`SrCV^osQA>N%qA<EkbH%3`Ty2k^8l!JJ!OorZ zar`qJ0n2vT*ysRp!^NWy7zMnkD7ri5WAuo~drPw%`uuL29NK4?LklKz)phc{KU zx8ze{)DKyPn3m|lFdaa6va*T^jN30A(61#3rtbx!O~0z5q7u%2zIl^Za@f2(HvWc& zhP{i<8%ow$do z5AQ{b{!1xIQfI@`nb1)J069`}ViN!F50^^Wagq&WHtSEa+_lSr93HkXy5=%uROd_P zeDj&KP0w1UiJnXAkvd~&*wbu2?^(qA?FjvDisWdju&TN3*nE2}$JZg$fQsWq^$Ls` zrLI7LaE!2mqY1bi@I`_?ex*T=NXMwJC}3+Vu&_m&PnT_HPAQ{5c)+pYe73?g3_!Zk zy-+12~*x;ep6xB{cUQ9vFi^@=LMKm<~hk$HhD~VR**MlID z3U@$WZD=@e>uV@W{xe3JXL}ctlvO7}FfA*!hE*iaooeSlW+;x8ysEcT>Rl*)Lmx^cDdLSTdJ z#y=bj`*6bGwjqM^%i4Z{>IO3KP@5we@HEkZ#0q9I_KE4dwU^@j65`DO%5!a?%~nr! zq#VC2d|XM@XP`4nh$2UAu7N*=r|E}{VrVoazz0}u1O)xD)K7+QL7rdsdY9cgJe2)y z2PP2XUTv`vodVwn2UJhIbz+-?>sq6)AWB&K0=+Hf86y6o!C8|LN zXww{x_7x25^GhyMjy*5&KLOGQq24)IGWMC*HP!bnCV<-@87$Pgo&mF4uvB160aMi9 z!R=nOtiN8pf6L;~S4VTR2!{s~G*9yxqAB5$w$<7%{CoRD!iAL7B~F%5Dqt&Vq!m}i zC&hmUI3HW)LCW#Cctaehcsw558lJ#N)&FS?e0dhxUyosY+Yi1KJO>gN_OyBM4u>jV z)~Q;6qgTF%weZz*UqMW~3CIVWn0f?d`IoUc2No&J5ov5nImR(-&5o_~>u5ng8L7lP; z)~VAQeWmDEN3co`)Z*`Bf{gUDgAldKyHNt}O{K1a?s2HqaV^4CpQV+OIQS|4?0emVh6QC3va1Y*KDl8_kcSjz{iYh*Em1YmjoA5BD$C)b#S7gXIQGqcdQ)G5ph&`5w+sF3>D4EDkiP2uNHA%q5{x_)A5Y>Xs~ zu+Zs$ph+8EXy2R zUoZ&Da2RDA9;c)HUKRW50o^vv&wF^a+cg5v*CR{Tm z6~cmqa*!W1ygQ5r=bQ*AaHdR_L)RxGqmsS}JtYEf2@N6S6pOF9E~5(-eBf%fQEdUi^_fFy^JGbq1m@3(AvAUAFw+f+I9yRviB!T6}X zx)mPAAb73vRy)l`L1dt&V^K7M>lc5j=m*T^Bn5HX6dn(5=H-yH_7G%0e+InPVfg@) zD9n-q$oJQ})w_d_v|2z*;P zzMt$qCur1oA8hRAi=*`ZC^*0_8rsCUdl*Qm?cs-WVBCXo|3!C4sb0kW_I=^*eC-e77g z2@vh?zjv5FPQ^o9FF6NL1N;f2B)5u;j0~7o;z_@h7K03>z(Hg*Re!7tTl+XjA%bK2 z@&=MHX&;w!;NioEyvUs>;@sh{PfokgA@wJ@@b)99@q*0~LJ%|?Ork98>`cpV_#UHQ zN_)FmMn(*dfT+$F)alEp3j~dK)aOeHF$KT0#9pgbG;kjXb1Gr32+EK_7gajuGQ>M? z4E4+QnQ#u~Dc{jrG1$i}20)A#Ni|Lue?z%BDGFl+3KyTwi~uHlq%KtA<=bTsCf zIMmmGvdL{&(+m13)3Xaf0DZj@tG-uLh z?XYXZHt&kc4T+9!#2aZx@T?Ka2u_7#eZCBTq%v?iI+~K^D@Z$)%a+(@m!}>R?&47; zLMrx7nt{gqEBv3MHnO41kz)hPR#q)1=h_7J$hjPJ@L-X&)p%B5Xc+TaKsOcWwGFkR zyu7D&y0nn>wxO-Cat%aqA+b#msUlFS@SUYp9nB;wjxPSqpgXFe3_jA|P*?ZqOvi{37q^<0;0^eX0dy{p5cd7(eeGuQ5({7!SzWwm3f*`c#HJnT@XvZ zTT00hfb8(tfYZk@gKIXlUxZ$>3-(zwgE6T`@W@UUEZK_6(fhnh7|7{4H71Qq+gDQD zc!L>T!kZ5l;Tya!(qFE-2?-6(&iomXQjgwZcQ8v(lcH5P=pb--`zfwc@3FJ@5wz>3 z2D*}ZG3xNdt>l^1ZNp`0*mls2BT}RifmiT$-(z{*FQi^&0Y-WhBqodz5QynZ3f7@3*&iekp#ymzGz0GIL;ge3pR;G@aMnB{%$l|A z50y-s&2*& z-dw3)?V7Z!SFa*uVjPBeiD+vaK75OCSdfy*y=8u6(LExYAg6lcYbSkC<2+&d@)uHn z*fV%(w30==kh85;QF(nqtflz26wDF!<}IjESZRMZIbioT4>w_k57oF&zXLpokO6Bl zxi8-IOz#S6Gy(7Pd0GlpeJ#I}^fnfhg7m-v%V|FY{Jn<2^~oNc9UzV!Y^Zd3e7)~9 z@5X$=1W%#WO0WNxlf0mOzWuG}bISyoJBp(lqD-EL?#nKh<9hFw`q?{5dQ2CM_Gf z^SfzIvS!7Cao8m}$1-n5TDNF8>syJ6*Nm!&f&F4uYB#bXoRtk6{F za@7NR1#4CNELUC4(fOO#x$3l-mSo@8Mga>$be4_|E7av3WGQJG3}qe#uV%`{*BcLS zmqf%JysxCzr|-3!65T3(53%NKS)OCn0^yfO(DwwT^^$s9)vSq@BEbmbfDtC*Dh_wo zr`x6xn^0s#Y*nv`L`o>w#I3tC&Wd^jnq-ul4liawtfHncvUKiIskW+ic+l||h5t`sqkrtW|Zd);a z!w%g)E3I|usjW(zqY@I@$xTYi+6MA55wZ?toL?zouIHv0@dk&6jN!JLeg=lTH*)b* zaz%N1n1#kL{os90hPPMG;P;7_4AhM0c!n?12pYV$UCnrTe^orz-wiHETzR)$9bauW zOFF*0t|OQl`0j^9GY>}W=_ssyIi`5N!KT;adsG>2jP1pztNT_T+t%UX*#E-YZp@88 ztVw3+d;=m%=4`TdXLF9q*qep}9Yb}fey!u_W&WDh@VNL=?$_9TpI;IqC+R5(?96saAS*`XHM`|2hK$2E{5&bd9l^HAF& zhwdvHsrqd31nrDgXQJO*V?iz~2&nEp`PCZdGfElD6Yn^bz5+$AJ|ocrur5ed+YLoQ zvGHNYEAEi0u6*GpW4d6?4gc_t8F-_e+|FUyvGJ>!NhrOjH0#bNm1isTf8CO{xh~cq zs3V2^;BE!~Vim||J>f66zt)T6y&}nP7|hV- zdX0>$i(T_#*^+GSx3em(j1+WZkBg=7{xH{E!&SCCHFM)C`d)|51tq2oV|(3W;2;v! zf80(lP>`Ewyk{Xn+$-ADuKxqwH=baBilgIKn4YVblTg_8vrQ|1Exi)Zjll))aoqsx z{Mdo@2cM^Vfhf6D+aTyMn`fOq(^z?BAMIAao(YO`cU;ksMBwpQ;K{2rDlk@ZHx~}Y zxScuD*I3T89vN{A+PLd2v6E?4!_whAbN%gc!Cj49VL@P`(Cu$@ZYA!7qVfoK>Z zI(moN?g2VG7Pv7k1c(#lPXtu_mO=2@t#N1ER;gf0g(MMB%!KWNtY+VT+Agefe*BH! z3xpH1HtW%xYZn)kFa7chQGN|@q#x8iMc?5TNn{TO0>E$l(C#7AYXVWrB6Mm9tDm1f()Zm55JFQ@a#hLx7<~n3A%-kVz>vwp&9aUF%eN77y3M=X?Yg zY#QtusN+MaytD!YS@jhSRx6CXGO!DBULrhOcBTW-Lbpl&i2)l3WgCvnwG(LT;;BFn zEd$458Ddph|MSQPPVNiV91MKq(S3TB#x(n^c<*7&EDf2npEF#h{dZ?DAo6(o%vpo; z$~_*3Hiu&p_k)x1Pj3P+5?-+xLFk2XFKBex)MyWaqK6(70d(^{YQnukIoBX}_l4fB ztaPhMCtL>e&lDNkEl*828e0^=xyVWKj*=8O#e#G4@`zgB2}d?xM=KOh@rFoDnqZuq zy$@>$OZ8&jNOot4-4W+)R|CqsFS4FK<7()u2EEAJ6K%IovRmfe{8nJ;MD{#V$5Q1y z?uKv@SHdxYK4}kOKaqNH!hRwyz-m2q#~2MKy4PCa#BNsN+g_q{nl^o64>7@m@_xdL1a#QZWv%0wxu zk#MF}0Tag!xrwGrdiD4UWK;HqM>8=`!7juY5UF{LTW5-h<}i^PUlaiHjNK1EI8%-j zgbdIn*Mgw}`C8-eb)1=ekKXHzp#>@F)zcf{@XtZ$T|ZgJmMG#2-`ng`7xW@r>m>V$ z<(pIj*(b3(gJ85xn86VJGFGA{b|84_Az%JJz6dYS_3VP>_1SeoF*BlB^ zg3OfEs>Bw}nLWEK+yjO;pnp}xXr#iP1^MIp0fL*l91nP&M#~uEmG-d48{lG19t}Pl zGVi^=a>5@{eiEZ=IvMv7aI1sQKKa1rm_Z3?;{=<|1&99+c8)X~h^uZRP;dfASZ4fz zO_tf>6b*r~_dpzgxDU3y{Ohq`qxieq-d}I4;9vHoe@#M9oRWXD*8FR2{mUu$uafas z$@p`}hClq7F8_R^g)4hlSAT+R2OURcT~7WEYl#N|(C zWd3mLD&c&@TaVkynh$c>fei$L=N%ve;0KP=kUjX;vF9nh*N?cxE?f2k5^r!>oQWtz z0$0BMu+1qfYNeG|shn?FMJnVN1WntZI1|A?a0!GPFH}cV!X~hz%B{7ipvV<8lAc8* zZFn8qe1dx-iOpx=dQEhmJ8~$W!}Nc{d)XF{lbEN5=%%R)R0DCjeSUy~j3z!Vk=8-E zGg?6;C$9C%rhVWa0kDSiE7v$ET)i|Iq)w#H`IJX2=lT05$~)?T4W4nsGP@PLdQRLY zqM7={g&$$1Xj1Kl+bY-#$ncF;jtiuofv=wnZuGdGzi!OwL@FkvRqlyDB@zsF4Yp1bx`_|F$%8k7-U1))RY45; z*pUpa>aT6D%j)+HSGFrRug#t_)WxFPiZB}G$@fO4#ri!fJCgzeKmSta zjD;Hph-A-dU;2TMc-$S4E8wXi$(IOPZGGAPBf9!UqA9h=2I`zL((F&L1Pczm_{WH^ zbqQ%sdKqaV09US1Av2YRe6Y=A!lMEDfpm-PQO{w}O>+_9Qm}1jtrORaC}@ zt9!@l;iY?h!mO!*zgV*P(ZZ2j{OD&l%sZ{wqftcx@_w99*aDAq)s@m~9&m0vOE!!~ z%cE$xK{MUS;)a#>@QqguX}kOj_YAudZmkeWG|XGO-EITsg&UeS@gZp2fYcD!nczzo zwe-i<=zR(B^2K`z2&aU-O;D5!X8C)H#$~92-gE|;&I_+8p4Cepv*DIf1BW|U<03GA z7wVIBf`ksSFULp>cqZMoE#wDf@{YG0?LQ*ddQEiQQQ@L{cMHeUSFZjJS+s)0urBaU zt)>Rtu1`#?iHwN&6;_(oM0=3NHtE(hNlo#vrum3|VfEV`=NH=*=$7Vw=ePd&+JeDn zuZzuuNZfK}??Y)}Iy_0dLYp&Z4xE^ygG8t&OWwOqu{PUkf`q9w+37L2Z0RcUt9?y# zWwl7`wk}p;;EisEM8tS7;TQ~M<}UfwB0Y&UB+zm79yx<-xxAYda_3YFbY2o*7gj)9zo(QTcSfL(*10_!u`Ynj)0*HvticU;HTL!sUx7!0G z?ylDMlMQTPUG5fr;l`ER##c2;?#rn42lv)>Ygshqc@UM3AUvHW%sj`@Ap1=gf($FR z;F0Fz7i z%kQLez)dfS;688u9MAck%)w1$Tkk{r<4h!_B!JCPQWg!@Ar_pKmlI&hKt} zuFB1s@YpH{=(ybikFA^H`nv~m&KZtY7;LdiTTZ(;!dbdJo4&ka%0{xR&YP1-4NLtN zu6cdsQtGcgKAW{iTs;Jt=9G~R&#$8hguRNJ(?OCObsg@kvbc}kcPn`H#8ZaXoSNt% zCY)psP5D<64~_>gP-&+`Qmt?$WwQjXhW(4?L z#a?^0=4^FvY<*32d1&dC&R=GlTTYK4tVo3G%d;PFme*c2ZqJBq)N0i7dEF3GaP+%J zx-$(l`}-B=ddd%%N%RItozH7LpDgd7snCzQJi8kdbJz9W3Bu||ZP`LC7Z(X=4B_(* zfEm=0xnM@3;HQ8sJ3Z%0JaoKV7Hljiw29_?X`6FA-zK)hsjdCc(DK!;yZhHC#8nVT zc@u1MM9$S`rmd#{d{uHUNeTTbdX89#y!d}ND;X~#Vq=cXB-bhYD|7}trE11N5qsD1 zmk$=%-sQW5Td6qL6Z@ zJC5f1^oEm))@8y;tJ5yCd);+K(=V0-&Nl3?j}dMtv-0h47{5~5$7s7c_?9wYVb8dF zeB3(0zqlcj1j<0+um=k`c5vLfOH*v@!gxQsKf7hsa4|r>+=|xs)JXpGf-Rj64Pl1i zHZ*fm_$T41TUaC3Fyu0YBhvS4oo%vPM5+XDY|Z{j9{$taB_e(O1OLtu`12J8D~u-< zjM1_rna=w1Y%9bxM_DLpD~rmgj%!#Nq|HL{D1b;WOUc6TU@z#_SzrBYas;on(rO1dI@eDb z`dL0&sR@r4_5Kq3X2~-{bugtsjdN_(F7PwxGb(SXIQy?Z{~NukDbG6BbCku_^?1i{ zr9jTxeb93JMQ}4^?!wEUC0&c0Ph+Z7I4ud`vE~fKwGt*-7u1jOG#Ne5GkpN&Yj~)# z*10?`8~3rx^LEn=%-;7pq-rNvMQ%@X$z*bC=$XZ|qYspZ_vCwod7Yddqq8(byfoe* z4MZ=IUE%b=g`885Q_L?=b-g5~?2GUHn^l%1^I+kl7=BvBphs=W;o`|YoQKOyf8FRa z7@O;S$t$kX3hr5yq6L)kvS^!FkjEBbvgC{`sLxc7PEn+0bk|paos)DChFJwpuRA4W z_Ilf|t(-taMyC*gPjy-X;~*j~s_VJ_*m{yAw0~WcvWBNhd1}yoQ-+H=R?CLLi~76C z$Awv6fOt`Hy{ctZKvUW#EDOhHWrXJ2&aUrlm+m}I`ACA~Sj3>b(kSvyQX(w)KS-V2OEJt(keDqR8s9?zc?Ai+h3U$Q&(h>wCO9o%e2GCz@pTj zpxVGitD~QqVWn%RW=zm2tdua56`lB=bp-`N>l4<#?)=)G)@f4-`@Qv&ensV#s)9Q9 zikS4Df!Cjho(O4Te;AwPr`UAAUZ0@!sD|X4JMt$}0$1(z>hRc6Q&MLSKGzHhImhqY zw|q6F;@#~9pK%rDMb(sD_fVr2@CUAuli1m2s_8hGz_c-^VMdFYo+cM=VlmiEVQg0D zQgPa9UC!X(;Ax|7Y(87Ges}@9!b)SVM!kGubBgz1?6Vv!&~D8%k&D;)fFv{gXLc0N zidE66$FBs{=FJ@c7{akoXhE=Txa*d-)1qq#x=kynpelUI>(w^{?Z|dj3mQ>4rWJfhMaic&nR^nB=f^T!wxkrR zRpv+5Slv1B+-+T==D)ML{;)TjUuK)=j}5Ur3o|`i#PJymtk$I;?6~c=J9~F=M-baD zy1;P&}O7GnB|Bo@&gHTd$D#kycr)vr(?{xo|v!~hPx%(xT zjpy^zns@0PU{=>E43V4!nHFbKKqY0p>ZBrmjX7bD)iEUkBcbISpy}tB(ya7V{*8dq z%Dl}RN8AqzG7tP28K>r*Q>T&yVnLHCtvQTJrN;RW*`KK?<20x5P8W_z?$&*nJJcVh z;}>K@Pzw~^=gjnn?JrFbZCalmM@_IOb_`ClirF~M+m2wni4?v68cOzuog@VYP7Ss} z?S<81UBuy5=x~bVrgVZLMpF zGPR5Gv$oJvG#kI~mZ$c+>D>Ed$aVS?$!XPCzZ1vN3C~B%G1PIhK^Y_K@X^R1C3jvq zugi_XV|}PGQc4h!OsM&X)6~cQl=oggCOSxe80`;?-21~|eqa#Of6S>5^27bpGxU37 z(EczJKTPmqESU&i+=wH)%!h;Tf4#1^;f}HD;`Q*7NYBn^r`#va!Cl}_?@#aWrL&(1 zhY$~0$NrbA(>q>!)ikm;tUyRm{6K$E(QFDX3odSdb{>C62xJPCO_uCY^_QG@GGM(X zCYMohQ;MH*flAgMY4&DF#`+7hH9#z~v5Eg~2~7EE$h1Lc^m$UuJn=_Q8$_cZIi`is z=ECJ5Ry4muQ`kq?%WF0tdcYp%(?G^;l=hP~TaMI$&&MES?D%9FSTMRsz4sabacj?= zLsxWT@%10OfQcEg@BYsRcq|;+2+MVt=b(cIw$MZQ87o$-h=t|`3L(yVNZxY|+Q0tz zF%edio)BU*|7#HP?GhhogfCD+f~us17!N|FF4d<#{E_`+346fu#!hZpde(6sQyCr~ z9+b&T!$qr7!6fOoI5YSW zef*qLZ51ow=hf(m;C+$tvWDjI{N^HUZIOz-@Vr=}u-^T$Znx9NuZOw*h_nMCHFmwn zXfG*96yuZ_v(7C_cY(vWYdQ1>cQmU=M2W8#A{}17GW#0}oi{)1B{Si?)CMEX9Uwr)Q z8er)+!2JidSu5=K;WZ zB@FjV#{1El&oMB?G4W%|_hd%GMMT#&VLcHlS|sK45tfw;(lRpKLDU3eTx2i;X_|Dgf2{CYg8hjit>j%+whqxy`e$x~5ax8Z3%0(wREbL3s z(Z_HgnmqmMLoZIvvxBA>uBJ2thK*E1F^UkcRgRUH!LshuAFZXp6RW58dt|Hu&Vg#@G_`qAK#jgHMZA${amp+sKJaDBTFbBFM3{`p?yexG0*r&I z)kr8h7MOAU`gH`q5UZUe`nT5cJC3Tme_|&FC}+3#40A8QSX}+$v1yvoHG3?_dG@by zm@{rKrJP|ufnJBkCi??%1Xp{_`UsuVy!3!G5>B{jM`tIz0`D)hYy~?SMsch=X6-91 z*jamCSM8b6n%Q#QwfD4qmYD^xJy(WB-k!)50EK< zo?u@%20npBB2aNw_>cyW;XL9T3oJVd_J#6F3V-v9Rkf$v=zZez>hUMIl(oOtMnd%z zyRS&ps@*>(oR-}ozU4&^AT8{*%ZMK-WHGJipYZbJ1?5578h z<3lrnpJk(5*dD3f>J)$YktvxM4!6R`bj7Ct{wK@GH#35QMEN1e6?_aN9bzVY(w!?c zOslfxPcyA8$#q}_93cNqXGfwLYy)d5D#BCkDe}4qF_nc^9kw>^f@SJdV3@hE#v+Ol z)I@%+#gI7Tp|cXK3(2tU6Lxy^7dPC7j)-?Z)Nm8n>Al-Wp_1U#WyOc+uzMEihesC` zZR@CvQKbIJaRni{_RV|mxFGybA&=HU$WhZ^Kawe%C8-bAv`O9PeNnZ5E+_-7ts`7Rhv79uqj|~xMUd|;0y{$7;o1Q zaU_d&vm}Oe=Qj?V_ibv1xPnT5MEe%Qwz56A}ZBbz*&4t-Z-R8-UEse1~% z4uVGlyF#dViZElKFUWu;Bq2tSQSf|z^98P12lylgv*K6z;e2R`Dwgg_bjQ&o=S+Yn z0&nsE*aO#waWnJ>V%7&$vT1|R{nHm}W!!*M3bsmN6%wN>8DmiZ7W|w3H@99piuw8OPW9 zjTfM}DF1Gr z_+gC|a5knIDupA*5-nQ{Sg(lw1Mi!DX@$aHMEgl!mVcdEejUbL!p zJy!4KH>@}0|EtZO&R~AEnT!zQxON&WSTn3?)fn*~cr0iYWkeG1D5yKm07UkF@5y3?#sREgy7n*q`r2mh-H;<<}@88E~ zrm3cBF|DSJN}+@)a>zENl7vA>L>r~4e&)44d#dt;d z)NvP3W(VyR?(Y2)Mw9Mnath|QLUtq_uwvte4ROPQ9OhrYh>@H^k=|eB^E2Me3A_pq z3gp;a3{*`;ycue1U5)|KFdi~MQviuejjLeqs^P>uGJo}o7 zLsIBIsO3IV4igv7iRtquo1LVcJ@X^PS@;_6D7BkuBWU-NT)+OIs!IOcuHNxJ-qTHP zJ^lJ?QAep>SyqV{jg0!8N<<=Ilo+=UcUC_N+VQf8E4(j+^+YnYYsb>XU(?mIvxF?t z#ZMbPU-!_6oNfQ|@?v7xKpUonPBJ4)J9|5n4uk0k=Z^ImM=_P|#is6V#8_B~DZ=s7 z^*$y`|Mg+;>K8?$zPL$eU`i3b_+DYyVn6tBeCTznf2kk(C|MX%w^p$2@^>?dd9D-B z`LznaH{2kt8$PUR%?A9CU0QE?lMnLUeOXsrZ^@z2imEh{2eIGen~xZ?X)6<};*HQd z`yBSJ4lvMFTx10oZe{{nEn%)r!uZvw1|SQQ?V!r(4_QlZ6VSR%9LReKk2_kGxonL0 z6&tS%XqZAr)~3cZX~n?`U7`olos;X1mdTrIX!(9<%nb?*9ASFcPX6r|ktw4%HO;e& z@oc5JoS3HQy}`uD0@T=$nl+#0riR9W0Qk#TeZKaMFKWUt!Bez>lX~j?Gc0T2>B^ij zHoC9-u8>`u&|ke$%6zYy+9ky}9C*M$==Vae*p7g&g8MI%Xz|7wkC2{1Mwd)>&mnYt zr-6g?U~Ha=rDd$*vSi?6L|1G>c5P1lsww>m!f+YAgDMy1Jj9>R}=^GdRUUsu^~Xe?&X&>_cj`>_0)qY<^YC2b&||0%!msi*6LjNf3MVPxbQ z7im!^>CsoQs8xaGftZG;*ZezQXBn5)REa=Bbmn-Ku`jUeHYrwg; zUQ1{4d-r?V*y+Kpntq}mScs~yM(Y6~g_AyxJkHJ)W7pC5d2g8CV7&Z4Jtrm;nZys( z^)S!F@xZJ?CQ!@hp!BUn$M|!}QdM&rbd!;E4V$G?m#>My#L48A`e8xAiyj7#d+tW7KbCX@U z$0vqOjk>T0*2yce%E?XqgZ_Vyn0oVOJc8Sv<~?ES!Or*9Tuli{v1yBDwiALXEY2%q zG_3B1d#u8rbj@ANokdGg(mb8cKhRdzgv(~AEeJ>G-!^{aWzLXi0XK8#_5?&Q&R{{WO~g7bQR z=`Y`A_uv`4PUd^1@?2qLmx@LrszeKddOCO#fqKLxNgaZ88|; z*^|}!N3TT%jW@{P557|Jf z6MsNXlJoG*G(Ni6Ow0lMb~JI+z0ZS|-LD_E)t=eUJYinRDJ$(M$jf~{%2N7h${HV_ zNn|pwV*GS5Z#Y?)Q&n(v!>Pf8$~gyn)C%goSUn^8ECJVn90L;=C7RD znM+qEG%X1;9LGN@C$g*bK9^F}(y}l-nHCrrh#Dv*GFuJy{G4$X%^lv4r~-!s7tH%l z7s7%|FT@hFjwmZErKiw2RYdv-M~Gb3*hdT|R;m^#&Pqt<6v%T&OJ)k3ZN!9}%#qIO zw^2)LSmXUJ%z$*%%kCdN1mCKhNyeuq*QJ-4sA_I!viIc_IYeklWx3P(=*=%hCG%l7 zFytkX^%lQaqTIXP=_kg5F<2H*#yYvIJ3FHX$rfS#^}VhJgwU;yS#MObRjAxl_&wL` z3G$|6l50f7hp)7K&l`{}KL}K(8+aTIMg^d7k{x?ZT%_-&r=JB33OhQOneJi&tNr&@ z?0kOhMB4b{v5&7?XLBzJ%s-DZr2*RUZhOYsWb6=A?nt#on0??_uh5#2@3PTLFto-F zHjNiGF-(%$n5Ihyg9cc!8b}kKL66gyj$}J;W^>+n%mOpFbi9#@7M-w#Il{bdF4vw|}Lps3Y_kX+aHf@)Wi9uDOsD znKvwGK1~Xw?U&;+#c&W3J?M*M&`lu{;rfPh?&8Ha4lThJN?cAqlU-u6O;Z!*_`$BE zfhfVTyz#UJ^Y3&6`%5&=U}987*G|Um>9qM_t9YWRcQ+7_;XZN}131t0HYheHMz4@Q z{u^2lkVURDRpcF8%8XBWFOqz{A8BFkPhTptUzQtQajvr5BAT|7w%5p{aNORAZ@jNG z;&Ef7dJ$p#^dp*1o?xISu=rJ``GwWpJ9gyWM+rfI9l(B2+wR%3r)x3eHsO#$Ut(*y zCUT*$!K@5vH`FkTxsMXuojxuvoAoFC!+fB{U?=xRC>`wSM|jaus)d>9eGmy4<70*U zo#8bIq^Q>MEC|rQL0u?GUIkm2^res~Eal$E-99ch*<&-8rq}kQvo7Duv3(WHWl>pA zFc*<+5pwBb&LLyPi=vn;^l8VCsF<{QwlYrd)1dlp%nGEVXmwNdb*+>rW$S)a)@J`* zRH96UQPaMMFx;?d$Z{I$TSNSV=|X?){QkRm+)9iZfr$&B((oZtDPMfOoRVra~zrQardhhyoG*f}ko*S?}_+5j0`rSmx zx%Q_`MspxkWf}th*n9;Rq|rn83-jkYE<AI91>#9mTAXBQJj(bvTcyC*}BS1nA1h z^CMs#RSySjO(-MKJA>Av)DNb{PM80xJ%u9Y>{WV*1}Wp5Uf;*MtRAK;E@zdT^!j8= z-DIxr24cY?-M1270w~K6Ur{3|!h^1`L6LTz`@=4ZX`j|HM#{&!)1~>U>}t^WXx!;4 zyZ9#oJ`hj=Z@t$<{tUJDUrS>D43ksn0DT2qKRzYADlteK-B;N>nryGh=EYpK41a9e zPfN{v-}x_BeYO`Q^U0WIKuo^9zUk-^3FG}3aYdcc4Kx`3M6}_TlISVYAzVPhlmDvS zFHZa;Is;{g$Pc{1j;d~I6e(gd$nD#=aox(ni906Tc~AH>q8diCLw6Y>(f{!j?z3ns zCE4yq;4}osKYC)X-y1g24ZL=3@scH);R|1PSdJjA2m04h z&FQjeZvKRuet}!U?;t@LpwBb>iQiDihy+^DqB61)^H) z?(0h_Aqq}RY$2fCBBjJbIlg^xc&OfF`rp+l?=1rU_-x<5q|#CEKSH_$V3ZE31|~|c z0fl#Gs-QuMz`)QJ$(#EE-Sl7fEdTx{8z>y2SnN;CA(CHge=Oye-nk?&hXn3HSZQH; zRM%>=zhQ)l6JU1evz@f5g`{Q@kA5vqLkd;`1#N*;|FhYWxJG>y7YVkZj1~A@|LpzXop@Cs_#!eIJ|}Yr;DoKSB%HYXTY*kKheak z2&WLVMz!(jVlJnY>rD&@-U`tujBPL+XW|X!|I;f3=aVhb8ycaoiXSfAm|#^8KEXwL zpP^w5yqUZ%UuNhW;=^2-zwL3{?Aj%we}f0nVs5mJ&UM%3^@1Bm4-2i?s&98qHkN)z zfA4r_mpf^GneA0|UEuK9R-uf~PHtmb0OEBPI3m0vC?7{uz0d+6?ZvQ(g42z+<8Y7v z1tnM#W>IJhO;m5bH=TWrVN1-`a=nqt_mgEj@otg;AL&try9A9}aB^Cl= zipm@}7ngyxa5wvel;jfPAaFO#j6rBZ#)Qv5Zn<$Hhn3wMJ67U#8}bHy0HlvQ6Osg_ z)7vgyHZv28f!rwNpOQzX zMVui%Jslll&>T|cvCCo!ZI}?IxNJ`XAMOq{VrbTA1PJ<1JHw|2+X!A%wl0=8e;g~t z*T)<$%^JSFulW;`llSFx3>O&W98uo4V~0owa*JL%OfARs%@FlK0c?fecbEIR`DK?L zkB@XBGd=N7nR~zJaJOJwaxxN0WuS{`u4NBg*>=F!T?Ls?iI|%avkeLbMq>UXb{Wrl z>1Y**Jb=8FCd=+_uQqY9#uAH-*D3EL@^-wx{iyAlH6uszSuc&!Y*MQI{9oe~B6gTY zz<8FIQ_mhK5KDaPm8UlQQ}}`FBjKtI8XU>{i%WDUz$!qr5W2lIw}kZcbR=1zZ3*n# zQ_~6TmOR2T!!qL9H8Vc99LeF{;)0#8J{sYuql@tnJhpILi4k8~gf%uSwn(@_@g;!P zF$2arpNb?xYq)69qRxIqc5u6$9KcB!~9e@tv( zc8LZ?uUnWr4KjA zIScf7*D#e0b6qa3``XStwKk+DPViM+NrP735`Q^(Df_Ad>zP+6t?RNnT5d{JoVE*O zNN2;%{A4gMCpO5CUFO)H}U zL*X`RmEg$Ns)PfQRg_yPw_a`wG4HTu9o-yF->c$cf;p^$g~Swh=cX$pOkYRla$?jc zDnG=IZbKUzGD6-AlfM&~kk=>Bk82KE%9qB+df?<88XAhY`Ig0o#1x|xh50nmi`u=K z>`y&_Q3V*w$k02APm6w$AAKVt(D9#z{6HUURcft`)Pxt}UOcrRoqM?Qaj(0qOyARw z%}djjnyM%}})SjV_^wzZz-mVDPJT5+EFLj8do$s^ANxgQdwN``4%?U(36RF}wXqmmW3 zxSCi@;CTKqb-ux2%X18jB=#g`k2q^m25mfy_US zh15@-AvwR$QjV8JLihGZbW3Rj4bhIh#vEP%xLB|7d^{PMtY2vPL1tL=4I4MEV7NnS z*n><4Uj@btTaM-ZL=1d*tRj#CBPlADVW*g{eIF(Zd76Db(8v3oRURYoyN4nhrriS6v0#*X#&c7-D0XCu+5 zjQNd8Bqmf4;Bhf!+Rm)4HZ8F_ahaOZk}!G41QeK1J{;{B-FXvr3iFu1MRczqzkk+! zH0ukDFLXdu!;WxZQ3b90-+RBTEwD%|ec_gs&3XRt1a~w%!mZg`s%YAGwAlDHwCv!q z(dW(g90E#9?{;sDpgnYbOQ1f##mr7aL{CFRXA^CoMCs`xoKxRi0Cc*f*13B(RA#(0G z`U^R`7P%d!z&^tX2@r@cW&LstphWGNVp$Hphw#8?i=z50Cu#ch_R(;0fVfFS>WD(1 zmgY`_OxT60|J!e`qg45D>{ntFz}ujNa6mwzA+*c;?PL3UuD847&#Yw;SqD=V^P&m~ z9*f}eAPd~c`Mt17k_1OoSS%x{BeYyZCKnK-1!sWf52xX-=_1dEB>bwF1)=q*Ko)xz zKL2pk5R;GyS8%d};~rXdiHZu>CUZ`s>e(4|;BV zxD-4r8=zIcUn%>6Ys*nBHs9zgU zKcoD^G!c4GkQQzOtq$cIZdQm5(#otqR}v@&<;<`>|R zFVK0anxW6k3&O|WLxki7s8YcxzXIc&p;Q63lF7;Wu-5S!ZdIC3)!n8)Deacx>ommSG(#em-V!8}@(`lZ$?WRDtJh;Bb` zu?h(NlWifaOB9^9wWD@1-6}A#3$t*^<3c&#^AMNFITQ4u=y35I%4rXJS7X|tk4EP_ z=yji3)P!`1dDo33{Mmhwe**>(rb(D&z3r>TxPlG6SDT1l3}o+mPyzLh|ET&&=DC|E z^Mv8#|J^=^tUEgC{A~W{kG05C;fOLmTO$1?vO)+ITFet5V#c`jjAlSoi^e^U@eqnN z1-@Uu4i>mrAsx)BQdZ2@zA)i=y68)7gM^Fb z?EVmT8vQn?gzAXP$|J&%LY8u3KS-4-BoXuB_i0SwQDM~}ja-$O$b7Xr%=O_3zVnz_ zs1B+Y8a8+pb>I414Kd+Q67{7nLIn`THOd79{Dm@#0Iw93q13?4K_c@f1abhRUH!OZ zE*kiFnwr&LJQf}VMBG9+;f-tmr2?9V${EUyd~R9yfGoR{1SGEOZLLn=^-| zcM?!7bn5BdskIFC!@%;9Qa?EK2Zyv|$&&E3T0FpTa$jt;ni+yQ#2feuH)ZTjuq0*> z0c7UBR^j8-Qe#0A2;^kW!nGHRUQ@crLj zH(#}v4A*iTituCtl;QV8$pvS1g%-v28#cVd0RwIgB(D3`k38+yMNt)=&H-CciWf}e zhsA&9EP2nHqRfjl7Xt_h&~P#Sa#;I&k|kjEqp!S!x7x?KjrJ3AT}RsJL4Exyuo_)c z`q9`ABsGCI$P?d@lp-K3BM3OCiq{cZod_}tapVx7c8Ef3GDpAs71`1XvKK;X3!C}D z>#Wl_wO&iaq3jDVZa|TSGA3X$2T9>W8WKk+kZl1a5egDiAj*!(AcgY%t-)OktyKD} z%AXkf`c)yS|04YjtK(jx$=UbkiRXmVypbQBVdsqg1c)F;Q$;5Hm)ovS;>;g;PG;`$ zlY9^kCJw!PV9ubs^Z|T|Jw=fN08W@k`%KK@MT}U=fiGaqQ}WaQ>YBJwhLZ|51?g-_ zxS~|4jOk%{dcB-1`_L_M!Y)Rk6Ajzzu;)3d}u_b=DSN44W%t_gdl% zYfqoTgKx$l@#;7|v5YswI)mZi=?fS0kTIdiPqTl&Y7=T8SJ7PDx)C0hV%vtzptsP)1a_+V7$@!$u&ZS3pk~lRLCXiU%c5 zZUfB0ykH<=kI|Tfbi4~$0FMTZfm;NjZkpV~IXuIprV;SvN@$%y_J23;5Nd^kxIdAV zrJk(O!T4Y5xEBvLn_sqxXFC6N5$Y+~lxW(#e;{SgxVt{cgXSB=+yo->156kV;x+0` zb|-allN<#bwe_8rzuNZg(+i7SR2>f*yi6T>w8kMI#p{T{#=LinzhAI>)vu!8-ext( zi)OQY_Vlhhw?W+3_a}c>_m)sOdZz!k+OtlV$g_H#B_x?8YmW&_`OisH%Oi`d5fKrQ zmzO8IEQA6)d-#KwdVpvdngRv}&bSS%T}`X;=FC{Nks0F1AbI-IEAq(Z43Z+#=oBOE z2Op04E{d)YE!dC#jk+J5^9QHpCuT?(>XI_}4x$G_2U8ExMbUEQ7-n}tQ!j{R-{9DK z|Ncg|fpdQJle;L>epqjrjBmaIUk39Y> z<@68iVogRGO;kk00_TPxt&yrXkg3a+DF9BDP4V535F%-5j}r0bm`gF2|NbD4+~7xF zvHjCS4&;%a`q9_OGDDWKHLeg3KW*bqO8-pWpLbY8YrY5jKZW5&59Ti>$WCx2bVO}q zu9EC+1>{a0ykg5Bh2T}s;eX@9d(~y^VsB2gDrL40X9<2856gRr<%iZYLzI1qKb*9X zEpD0*f3*|UfSyWkY!M@Guf-LJWYZ}1WCjX_2h*{PQ+imqBYJy#4-5=Eu(yU=B~@`~ z|HC6MP22`6f!eo43Cv!y@i_b@>rUj6m#m?E`-2y!WhJ|KHG{N;w*XYmk`-+6KY5>T z;N12TLzP>)Dt)JG+5#OXd@;8y+ zPwoT$Ky*<6Gy%jriqFKRAHIueUzaKyWT1^a%K5e}Dh-^77Mmb_3d8*atgiP3~04gveMQaCPNirlr*8 z%>%bqW!wh8WF4z>hfww6YVU|zGKDuknR>SGh*N;Si$()|4hhXZM6t{K!#^ep68UOX z>eCBk>QsJK#(kWFdhh|;8hHy?v@x_-ZFTH%CH8@|sF>J^r+-ezzw!5ck(?~;CLKBX zy+8_y<)j{YdD)j^o)aQ2By^OX1D(nC?Bi=iMBImJEMWL@07?=a9i4%)i`c=F-4iDr z?sJAR(JR^q8g}t|9R?b`)igD$R23QhUUkmR#o>=4)YT@xNou;ZTGMd!fZBr+bVJ)1 z+P*7P6*0Mk3--1!<{1E}b`>@eIrsR|vxJBv70uhU?S6EexSgkXv5pr$X6(PrY2o7q z*RCCuq?(8l@^%w5tOQE)ypobC&p$C}%owC^wEvt`tOzM)uBY9Ji76Bb0vZDpPNJ6@ zT~tzn*)9Xnq=@o8fUxC_Dt9wtvgo!+MbBb$b^zT+a^1RSB&Pl$J8)Xio+IK`AtWNw zs^x01f<5=wfK@anRpNq}25bzb?eN~t$<#ry&4*op5IO7L>peGdxw5A_NS+UReo_h23kQ% z@JMOVR9?KOg|nmzkPc&MdR?dl3UR0YmiDM`an=G&olA{Rtw2(0e4 z#;a%AcM@t3H|a*)$LIs#5G_wGy^2nOJszIlO+JCBDazoO=N(j0I^WL!9j0;+f&smdWU@LFX&nm{7`hG5`IL=QtST|1 z82d~Xv1$@IL`bn~Af2HC>6kpbA}}~OI4mq)>xZj_Z(&3{c&O(^lfYC~g2t(W1Ypcg z00#_!0;Rv0L@@6ejfiDPtxylA9D4dEa-g>#vyrlWbJ*{zLaa@Vq%|_f zU*7~iOBksL+SU9yeKx#cBv{MXXKwPzb)w}kx0JlSM2PAFHtNUG_pl&thXP-O+7HL- zO3yWfWhNfi>C2B>5JLaZ)HJAFc+A4WAJLIa36ZspTv0kl_QU4IrY?gWTd?s!NfL?n zpB{`beu4$7PNX9UHD8F=I^}O1SW!{&;>8Pe8s-Z!gzjja@~txjqvl>Ho|aeN3L^CY zCSt89i;>&i03JvN<`AYfCo#(_P~J!BsfHbaK==m)tYzImnGi#^v`d6qgR1adGRi%{ z5z~(OwflClJy@HJO%5vEK!=OC&h3kJ`Y^-^Woh?;R716;p|0+4Fh_2^a-Mt*YFHe$ ztS_f(i-$7lb|=^LQh)q~k=FD>KDkt(zX+X_$fxwD-#-(W)sHODukZ~-4v3YA9aZgK zNn3B?&mp}E#~K++J4%+6Ew75x1JY@YAzF|*HuKq)4Oq^5hn` zjA3EG`Y5H*ADGL(IsfI8*1)Msw>>KNNbc8g`~62iUjhR$#HK>h9@n77G%{Ju?ndG; zhQbS)goaf&{hDxn2nq_Cg8v?S)Wmpn@!YxZ#~u}8@};P02fI-P-*kY^9XHrAK2y;3 z0k$@b)XQQr)Ov=hft)Dz#d0K07_{Wtk**8+^d4?ypbNVSBiK zkP$q>_Nco`Ln*mn-JV5~CdNwhU%Y&ozqBHDdsz93O^L$>(y)@Zs*FP;lwO3}_w2d9 zZ=c8soM!By`19(h|CH`CThg$4n8wRl@zHA~(WHy6b~0WYbJ~IqIs`i_=aZe9$9mGR ztI<@Q7e{T0NIJ7G?liDLA=y zM?*m8X5GcHa@}}=PcZ!Yby2~fb?Sk6bG0HGmpMrXI#OVp&%EWQmEQ4`t);M|P{ng( z*H)a5%1SThHiOl25)w8*mq(JnVb?Y%b%e^DMpF|F0(yVuPix7l0^cWOwA^?i5)p;i zzUa#}2bO@uzWh`GVTtJVz^T>A8SPHr&NMpz<6>y^BDGFOxZbM4pMWk&o7R;+UuYNJ z@)U5+5wqOY{gu??PtXh+AOfftt+ zx_zXof)&XatTJlu!5u+AS(mOYXdkG+f@Q()b#I4y>4|(9C-G_2)YQcE@5|aTE-EqZ zBUBc6Mk5rDG;?lK+~a~=H!F7DPhvQjU@;**YKwA5K(v-XjrjGSM9z;;5|Ws;eLOe< zs6eqfBhB9oy6O&pONf95R#OqLy}jqKLqpPPSI(Tmy|{}aZ@78$W{`o>fW=X;wW|o& zeFO(e3_8s`)6OKW^eCF=xsg0lDI-h zaUMD56@fNl2B&;a`q9QGe~kF5?%MSV(?_8jlh7K^c66FT?&ZyKD9973gF+aZZCzT| zLlj{E1R5Y;MS2SaCP?Xq2rb_UhiGz;1p)e;e1MY#iXm<^4;RaTcFySBCz~c&+AU=l z&j51O3EUTsI#SL9$K6C%t)(eqdIxaf@|bTg9+_lPxQmkXhpPlJUc_|~#AEmPrk`V^ z;kT2ZC>fFdM%(+#cgSam5AkJE5j4u&3;gLl7^#Z(%=Yp>A0;KZ&&?}>>-0Cn$o&Yg z4v{!+iwQj<^O$;1(cJ@fJw;pE{b%~LA7R#Zr95|Jl+2YglHRz^afq7aOgiBtO{<&@ z1|W7}^P#+&$U{<9w>50)+qwO$`%giO0 zHK!%5YUR~v&gR`pYgSu$UC7TSzq8Y#gwzRbO`MsbR#g8BliP2U*xO1LND+6`#E&1g zi{F&vx%|-5&pdnKWNOY6yx5=JbrX&;O@KV&~`Chks)p$mo{qM(s6iwMv_X&lDV~4(Iw!&KzMij|%5K^-W^CgfM6{4I`&Rk(eEfm-R{Q+-BWhMpn2nRwULuTXic{FOcQ<+|A<4&Xy6)Is(lVNVP^ZUDH*lUlJKp@tJLc;}=c_&T z+AK(_p4VHU-kLT~(Ws>Umo@Kt_^t59Cz~?q-;DZxhQQ*;)@;Py0{9G}ZgtG`F*svD zsp79Iw(ssQa71xi^n2n&qPcc0ZSjUPBQ6no2hf_QT|m`Nd0D%w)P$6kdo#6z?$FfK z^s1)KVEg_%yBBM0tV-d?bgk;$u8dsI<6!l7N z?!sjin8TCs=B}DYwD|T?JmtN}ZBZkAeYW<~40djRK7w@$z|vsrYOKjI;gp5!2zzX6 zYpZcJa=`icyEU6)&6;w0pfJN~hzOt45Csu?O}gQaEK|kv0d}Do2tj zsaI1v-!KOqO|+>Ceq_(RsPM0+A#69Lu&5{$v%if`u}>!OIiY?eAu&TU6ku4Uc9rV4 zvVggXa&I*|eqZ~7#o?3<2{zg9wj7PLNjY-ls7D`csjt6#c+2AAjp;eD+fDf%Zn9_G zm?fu@d|!X>%F{b5;+MayRyldHdn;9`?^OPq*7k{a%ovD#*GI2N5bxC*sNdA=@U!dj ztILl`D@*5_8B}}7j=T(HhqOOl+|{>tyX?U7daL z8Zo!^SKe4BV|{bni6oI=)31M}*1<-=PKK_P;3*S!gQRA)X!w?wJ;V8U-Tn8h?vVVa zzNbfA>=g6djZ(DY+y`3ypgibFAC}EHzoT~WW)*zV9S$B)8X9=c9KI>IeofzVkrP#; z%+Yxo_}%%;9=3~JbK1%36pt((8mV+&*I({4t|{(I(o@69U~ z<|_Pkm8q0hn5kG*d;58|^~O~%lZnk-F-uNM8ZrKK6}7D{v$9NZn_-M*ZaBTPR(A6` zM+agJjZRxg)+t3=U8$~&I=7uNZ4LEv>=+4#xri8TWg$S=o}5t|r$PdG1f_|?P}Sd( z2GlT1pMB+pu=ovWa=C1*{h|a*uhhY1GWR$wE_Ql%J3;)!XR+h)^Q_kKfKYjeG<;cW8TI|F)aDYy9I|Wa>aDbtp6k-h zK~I@y7vtBjD@bdY_Ho(P!wdntwG4%_Ux?dY_R;Bxs;#ua5z2UrcJ1lbbD`qPgbiLs zh6&xH014cv@)?&0+R{9UQ$hyl1osX02QNu*kB@TK+v-g)1L%2yspF=BI(~CsK144w}+BcwJB%RS}l)zv}e>hKP+SG zKqmcyYO5p7x_GpnJTtN{lP=R*&i0P6+mGb+4bDFc5r3Y7PO7_qy^N~IMGJM z$tMF5hx28#wyO$IA%iAbKq9qx6uOpA384tH(|US?c_iL?4m%$;fYEckh2wUQR#T zbcJX_gF*6*O-C;%b*!dpA~>mQXcQL|yz%@dbir!+*bo>t0@~CJd)sCVS{vcimmd1- zSF)_UFO%1jhUf=Y)S>IGEt<=m$lc7r=jUcEWGuBrMPH5fMe295>&9C%>Ep_@ow4bU z?eobAtG<#9ByM-ZWBnn%k_+G#dI|+%SJz25QBf(WeprKv9eg|h??pvO{@oTmS^vJ6 z{NcSwxb?fkZG1`oo|*^w&yU(_?x}59DCnGg0ofpbbphxN zefB-7Gwey#ZS2>tUq52@jr$Y|Is#D7&#y@%(@~3FOGf>Q+2eBZq@Gdst z;mH?e!I|vtVSEO3JR0BM{}3=3;I2J_(@#D7GUoBQAb~h4QnZ#dKm&kk9s)8Wk_TQu zr$XRfxD;?xj`N_E+Yi6boD>VsD`{J*IW6lI>@^T*%inl!hzMk8r_FGHlvc(fa zLszilV)w=Bpx?@jz=E{D5t#hq8J=k&0l)R>L(|v_xy@aC5#S&6_UOZje_k_ztbiUX8^-eJ`7L@65O*u z*g!m7L&Xaa${Z?9@a|aVzNq!@CBBSVey$!OB8xX?NF=ZLSEbn-O|oRs)Ji_&pXkJ} zd_y>c`1&?_)G99!f~K4?G)hkZlZ@!a2<&>l-5XkD@AV5WG0rOH)Qg zAUJ&u;{MY$f1oc{&~vQ#l-=yQloJM4R#qU@mA+^2P2~fbUqi6V zs0M~kK~ss7yR?~ug9EfZiw%tX+G8RF2nKLEa|%HjnTxKze&D!&F1nlqUAi{0e^LDS zgm6%-RQ-c9S?vT*u(krkRv|;g0F7 zlmUpJ{L!fO*s0CxPa&MW55gPWeD{=o0AvK&mYG?OIslyg< z`Gef~DN{%!m_`A+H8jE?LW2r@t3%h%L#n!vQDFRFPDgzv3)K0*>K&x1EbRXJ!uGe#gU^&Va{cz76_5<-88E>h`J&96E*D!VE7e!KPTlY9ds z&I$R(m1TEK9WbH&Ih>!<9l8&I=f+mJeqGkAV38o_{sVL*G=P+|W5pUKwV;(^5ESRz zheu!E+y-7z_Dq|v65|nC&V<*ki#JT#MeBM5OxWrBoy(4(7}d)xZBXrP@R+!e)DpyN~H2i?2ukocfJiU9*&|1VIoI1U4L<^7<6N zrEOJjU}IyHpqCXOArej4wA1(`G-;GE=bynltO@l-V$jTqy_{m z$~ZnEJ=5&ivAsCp7`S{jvy_m9f|b4<9oK!fEAJPUHUaXul>+q36tp{TpV$OloB&?lj8HWrPM^l>iroU>F1tAhVNn#}gEbgaOaY?jfF z-n%qoQ~^_~Aop8=z8gZ$QQm<*?7AYxD;em=owV`VenP6maa+B5o6_OAE4S_wwD)~n zou|4(Ng8SgW@MzMeSm8M8wFt&_n9W;xmpn732Tv4Km)-kG9E(24I_|LbpB&2>zIt<&aG(~2N_#&>QE{plKn|j=sTs(<5n;U(Vy}}F|IT>wukIFT?9dvo#r*<1 zFlll_-3r?SIyzU4Lvc*z{`?D+M!qAzqGeG;Nht@W8_b#Zb=vSxA%g|iGdQ@;F#^QK zfu7$`HB>>)V{-t24espVy0_Ru?E~T(p#C2qPT;Qud43lo`1)L(E%wDa!DSiQ?J7RY zHy_Ibz@$!5$-#LoDJt5lr4^*yrXm2bivRKk*zwZfm8s9o>)b%ARWQ zC^2uLj)3IH6haC}CYjI=G0zfl_CfnR45j*FAcSV;=C)~ab{`v`s>t|>+&O|8G|0p` zmM30;$l=Gu8}^1P7)G?@K+rzWS&h%A4OPZmK_%BGzkiGMs+7#^B0k98y?6mWN&^~z z?!lbGA!D@Sz9B~Wgh7f1P;C>&Q$W{oo5*Y=fXZlN%P9A3VJ@{s0)bAaYRSw*BttD+ zDshJQz|50*iUd11WC3V)Fc2IZhWgzU6uo1ElWIv5HCF}QA74^3|D{VC{g}x+#fq43 z0wL9#m<%#HoDnCMZaQ*~Qe%ragKx@3$vjvnA@4eT3T1?_W@wv-yTXA-$MVtHy1_95 zx?FQ0Sa=;q1|7CmkYB;1)-BHr6VMO`4xrZo2X|n#1uTSrQzBYo%kfT#Wl%h1jC2X* zkuTmvx;Ped6G#H&Jo{{dfti$!p1mFx7Iy909n&ZIyS|)Ikb!O|hAUYGL%?wbXxHCD zZC(vTdV6xHs|Ic@Ss)Qxi-MyN{b2ovk{H0$T3^nTGL}P9POp7EpCa6Ox()0HzzcUu zZOFP%R`wtf5`3xlNiH=RT+ol+D8c6FLEn^7Ur;P7C8ay%-$_8I$j!|q2FW3`yzx{3 z*ii`_4e=;n#kcPCpmyjlQ%Npns5Ubgca@KsnZ+uf+yCIuQ{Wwhk&VdOwI}I02L)fw znqcU^TIWqbv3cZ=_t1A3db&i;?w$?u0-kgw0Qo9)H@E?Sm(h%;lk1G(0p>!qr->-_ z8HZzVGapu7pZEME{<*e64=y3e1kJE$3eXpX!Q;Zz$iwbin~_=Jpg%(UthkJfj43yx z_~4iC(a#%V2n-y*S7wQLpO`)L4*e^1#M+9AePBS8#0x}5pp?0D`}T0-ADyLBD$*}% zVgYf{ch;u$5(4_w)PR5hWFW*@Kzyxxc+^vAnb}kdmF2!c3MSWJoz^oSiG?3J9tfa< zIcSZFol--c+D}%OgvkT#``GKw&4h~DNqJ{A3CCPz)MY{U+hy^@Vl}V977gw*ylT{4&WQ~9fBGA z{sznt3qohp)P51L9PTt{ph-xIuLlN3Q)lGQ&sdm0Z5aDaS66pXd*+lDp^@k%C+}c`%-h7p@A&?R}Cau zFn3J5M#lkvi~(%bWPB|Um)i16UTj>=2%3;Fr0v-FyLvU7qw zoJ!hzS7v8H`||p3PTkrauG#4&ZU=VK2Tz@`i_PRa$r_g8i_qnq9NTSM&|kYV$oRK= zzJOw)a=Bk2`wRcoy@A zmCJQSV)dHtW(&MKW7%+5ZJLwu)q9cJ{^wE;hBz+&$rx=* zDTUnCT0`1~VrP@4T}aOEKgRqg%HAFPsOPqE6ZN*{aqW{py-M>^uv? zz{~4iwmpAEqB=YkIZBHGz9`H2UiV1W~M0+**V5l@#m~|RyL}n za&xS8n$!NP)Dg|v$BrE%T26kuwH>*IzvP^XZDfkSf4KWYou(x#Y5YzBkU#C}S?uBi&^}hM*KJT3+gJZ~_yE1q=W(U?r7M^8xHs2ZRbP%>YIotT zVWYv1?VYQIh4mAHeWi93TD`j4D=8PJS$}}Ovv=?(U9q7u5djIq5rv7*zAngQpZs{p zyA;OQr5x&?3evH?1=m6jhi$cE%~4dT*lXk2;rx@T;0OH~{;T4YG{>b*o`q}P+ zsA2K4ln~)9l)=HE|EB*!0~xbX!Mx~aCudWndO(fhJpPsmiQS^O#^0zsq1tLydH3MG zmYITXN5-22p!)LV=yS48hr4GS$?b`!k?9-bs^V0V0KZu=>%#^0q921NOVE%46A0iv=W1 zKCBqqJ^znhO6CLeY)PZk6K@`?AS^TsS4V32&$!Z27^ZD=ad6>>5E=5K__XKdP~{0^ zlV;rTSCHP>J@~q7^u@Ug%-3fI{PnhbIIGb~K5yRLRf#Y*mCELLNHr&#U3k%sgV^e|~d%_vkK-W$2}Qm78ndNjLA} zQ0#sg2+e7zFjM8qQ{Mk>F1`@b!B?*;)QC z?=Ih(mg{hh7b>g0+huMRmyI3`I7p~3=Y&;^jSgu{qXj;)DNNP4pw+&br-=3sd2xNZ zTb%3o4d(zf-~7t>chY^vs{6j2A>`fp)pPz>wD<~Ire}Q;R5nZ0d=gZ)bR?@y+vS+R zK4_gWH@Dg=B*lDq2=FPcEvWqdT~9f0FCl@3*zf!C%#qiOs+Im$(<#d=p1(L}zf_mF zNuk1oe7{60F+g(Pe(qK++spm6Z2oJU)TcAW_}ccrT-t@5Q|Ql@9DV@!KBTSJn|s4R%RVL zluSoz`DZ{@v|ABrQJ$l4zGQ1zb1B z*1o7&ZWnVu&*xcj&GlOQ7m{i8eu#JnXq8v#_jgW0O~`XkfmAw-Pr*PAH+85n>qw%0ZFlcPGryL;{u1bA5y0V#Dn;R(byA|ea|6{C zCTn{HGsqw-TxV*V01`f;ag|f+PF7DnoCFl$xSb(8@;xcv%n&L9j~R0TX^Oi0@(<3M z2Rv2)E~X;}&(IBIs9~8rAF1SuO;SrAYV8BAAug%!oz(oQHgD}$Z*Wh!5jYA^qET*&X?fOt#mv@Tww*<6@JOgiaMDY1f0Hr|R6D9l<` zO}5;ww1#;r7AzJ%I(y(jzXQGM3|g6nQXkLf={mXv-U44X2MQq3pkUqY1G&|S zv@MJ+tTxcc99&TofZ`69w!FF`C}to54Raj)-{8l$VAEsl2%#S(95#Zyq~QGl zER7(S?Q92T&AVE-hS9U0x||TMsbMagvUmqB#qEDJ2<35lz`y^wUD9X zp+!i99U8F@nPr6U1m2f2&Ta)9y46kH-_b-VC12=QNQ4Gg=f-ON3YFCP$$Utj$|M-g z5J>6z{)ue{2t3$b>xk)I1lx=Q1GX8=XB9{&*g+Dm4@dn$`!`dJ zLp^W-9$r3!_2h93H(@HdjbN+;PG&kpcIe=D2Q@Nc7DRX!!;k~=fq|#W0>tbp)G*gF zOG$LsB-^H4ZWV^iIx^nK?kMps@Qm#KRuKq$_CI_!^A-!{Kg?U0X%hxv4;mmdps}tb zggsyNn3o6<2hES}0~OSS`{9IychaVb|NEZ>|ErJW|CZ!`OY&v6(f{p||Lv0h?UE_j z^#1SQ{O{oW@8JA@c5oQQWW5tvIm)aUmT<4Eq@1!kwFAcEQSlXx1O9%aCED=~*shD9&<0=!7Mz)zIZOQnB|{&yKXdEoW)4XSB&f*7bRWFSpNUmd(W^a z(`;)PwbfQsxSbOiacDnU`GHuVcWL7cNY`uG< zf*xzVAOreOtdG{AFXRkbx&Fb!{STD%=M)|afyl!&(=9m-Oh?P260Xs#^-fAihP(JDvP~zttbY=E}V>})l-?{i{jXg z5l>1p4M{1+2&$__k<-#hpgNJdp0DcaeA|N@15artMSNZ z$tCJP-faEyc(xg;WtVloiT!8h9NLK4Wii?IQ;UC=MD`XQUP=bY2tN+E zgQHXART7_uam0u5=WMu~FV@wMqS( zKxL_Xs`jjlDBFwuAIf>pXO~jk)xPi0A1NKkV(D5f_m8AUHTu@F&HB7<^Nur4bJCup zYO-{+y&&mhHLaqKA0a)JpCyu3K1OL;X>;Yq#c`$Hjz|lRY?cGam-rVoHfb>x54Yv+ zby8ofZ8cA2y9><0e`RF;8%9RDd|52@{gW5ptDW8F(2JyJD;;mHbn{J6J2cU`?c(fI z8A6`{D5Q%=sB#`TobsvYv(;|2t-HT`sqMYN(wg zYt8|M+|PV|Rg-I1eVH?gyy&jLcWSUbE)1CUj8bgtX8SThpR)O8{@o(c*YzrdA4lzg z!qAJe3v+e1>H5c%4*j+?RXllD*>B-*suQV14du;vf9d~ps)6TVG|ke3Q}?p1 zr_oLHeTo5Nt$BX>`6X*yJ^DkHW{XJ2rVFzpGXcnj+lZtN4m(RE4vKxZZtwWmQR%zW z<-~{dg5Z$-Z}2N{X6t?eNmKqXW#j2nhL#KC^5-nB;Y0^o8U0@vXb8D~YXt=z0^TE^ z+?V{hPnkPDbwhQI>cdRyzy3<`*D4=f}*>fBhBpGF&>JZs6T zzkeVfdZVPdh`Gs@();{JbJ4f5XiYauCMv;pXc+)o3hRkww?abMKbU!xj_Obvb3Jih z%BPuu;wjSE=3Brcu!h<67FaopXc#2(Ixp%r2ni>uF`{V)N-0YWO~MpmYo||gL@DLg zU6vdYtJf&9M16mQz3Lhrs||)4tGZwh4TN+a*Uy>ksbQpA?iGu0Lp#a}*oEgTacp2* zkxs62-$B1@|A4HqiywQNrfc?awiccEzp+OBTq5)i`;PTw11CD9WV(EuKGtgV@o4QVWT+EJ;<*c`$9+Wo4+bfeGT?xn4gd*pauf-I_s zqh9?lB!><;9*%{jjO2%H-0m3cS{Pto_f$rUlC}ln@RsUtzRkR( z(8x-K?q8>aQ={kOI9KB_l#SV5mU zs>xWP5pgivA^P_F_amhgxqpSKG z#l2m>|3B0)QELTl{O7BQbjrH~J$|*Hci`=%7nlBVe7B$O3$cvve|>p=vCj2hr8glI z)9f~-{){pDt}cyZ&+3~*?cy8-4V=7r57@Uhmq%0cCbCUoLH^~EE7(xLNfMH$6OsR) z%dUkzy=<1!Snz)@Ke7@u$b@XX!WV3w-wmkba6%spTqtR%aO~W<6BQ0 z2qzOlU+Cf~6K81=q`6W7-LDX$)#}x-qH^w)sbS2obTdZZxcINs-yZc6%rua)h_(%; zr-){t-Y;MocH!QV?Bijy2T&{kTQlDh$1dTwp~jZU{{r3ct)QS4+V9rKP23wILpJ}Z zph4#KiUU4jW{}t53dkEX0TfIbe|&OMf>y8nDZZJOyMo<+Lr@Xr9n7aWITBJ*z}@a$ zwYw+o1m>N@cd{LGp??Xk)^~g5OQ*G9tH8<}@yC(pz-Lk25U~X~oz8rRK_M{LkC5U4 z#-j#1s&zHzm2B3utzFA`yXoY+9pX-;1sl@wuAf{RlB+b%@n&rK=kpDu24MfuFrY-r z)%!kX&M=IJ0Y=L_7imt9GCU!8gT_FG4mOwA-DiA`M%QCoxKn_YuUWminM?nXhV~97kn_3(Bclp||J=Cv_SSfv4*^?+2PsbcmJE0vn7%Ri!PckATTzFsp03 zgorj^Z6X+-daws0oBIDs#i=^>6`4W&<8bhDqU8Xj9P_9c`wzF0-9A}y3P-C(XAb5o zOYGUG!1UIoHefWCRpqpU-iD$Y`hRB#i8X=G040~>Ha6YD%i`tsf+GQU24XENAzj)D zoITov&%g^QsHL|Ie$sB{_%Bt$3H`wg?uG-#`U+Ko&C=4HI32t9?sXpDdKfh$_OEXV z0sP~dFsPR|wpKl=sB{(?r|9Dw*_i1m@rW>_1eb{=Fnn z2m7zWTK`^>e=o^DmtXt$Df#y)`S&RSh5f(1>A$_{eiy&mqA8L)iYml|c@QwyTlxGqdK@Cmf@B{JZ=e_MN#tcwm>PsArLDs5z_MTJu=PK$yDJ6cLX@Ump%@qK&tT*GfL z2~XFg6;YoZ1A~sdRgn*#OnCl$kfg)~s&)fh12z=0)QogJvL2!5&=sx4R`j#6N91YS0YyT|JJ~IHdFjt zQ|nvd8t)_mJ>UWVXN+nX zNO^3rHSMw$`#BXOc^(bRjAk_%n4rZ-5Bemiug#D+witRdV^)$iPZ) zL%F_MXxt$Z(TLCE@vRVgp(m87m&BUs6ZKDZ0)Q5nqz=WtlrW+rp9=-g@d20Q%_eQcJV)- zs^^aMW%LF2=CsFn4|)gQ!E*5j==@sa-vk7HXJb|$j6ixcr>Vca`p3+;uVs65RFmtN zQ`n9gs%p?!O;(d9>Js-&;tqmIrMu40?2-K`uU<9=wK&7=6E1&@gkANUvBsB~<5qDS zBe2?4U13q1wdY-ms$qt7Z=H})y6R?OMhZi2X}5R!Fa7q$;y3$-^B>PP(S=&&yeIsN z1-0cE7N;GPZ^zq$-~Xp}z-6Z}hq8zd?nfao_|?wWIN!c~k#l{(o{DjkPdnmly6&Ew zcr9*Mr^co2svXU`rMBr_+r<1FOvR=Ib$W09xo z`%c7_`Lq620Bi}v053!=J&u6AZLdkjj;r&to4xG7?KOnef~KD)Hjevjp}?76IcRfr z2dlBB%0G09N+X$N#yQ!a?s)B1x7BL!s^8PKsAhC*^a6M8(kqwdMz~$KZy5nEL@Gz?nXQy-;K|x2)bkfEVWj-j&7j&sH$zLIOw+-X zPmlDz$-0jZxKs%p)`Y%7)K?oLk7V<>0A|_W`U-aSYa}0f7Bq>rCcIDDJucbPwWFoy zL3?#-?8z}zZSgQxL=tnqSOhv8JO;fw7B{X%iML@`w)lb&We9B|cc$di-UXYivO zSb!#L!iNcpu=Imz9~0fiKoujyhwj-JMjOb2py3C%5A} z#|-q;6^}gsWjuV=7MCAPMI}(u+|D*bk)=|#9WwB8i;QI44p!^m+pA$d@ufSE@~NMgvlptUlcId=t7}cJBNmjxp?c6O4OeT6g8+8C-DiWRyY+AHEN_@zC6adB_6xT9+68d?=yD2~`sSb6lv&YVb|m zNHJ7Is5T2^oqCw*oT|tC;Np8QI+zoGd@MaTZ&&qm9I+eGo(HR+YfbR;owPuOG&Z?aPK~juQ18-~WxfK(h4b{Ibyf}8u-GMQyyDz8jVa@p| z=TZ7c)xa06`du%6Iq=6W##t#(1r_~1+joU& zr`_4^XWPZMK}OCeR(uD0E@!R)zRnn|Tg|ZWC=C@-V>gvkX#KoyzW?(=-f&o;+W5!m zA{LepKA-HGy^`_aKBh`w*wVSXrabLag`F=B6weUf$}UDJ%WI_9x8B62lQCa_T7`!F zKMy|@7+-ON0@F>tqV&PAIZ>=RahEIuxGz+!-rhf+o3|h<-+1#J{f<2MkY276@TVX;VO`1p{_!3tmgTktY<98RCe~o-oo22DQYtQY zDn8GAJ9QfQ+|M@&s}2H|`)0yy4|b^(%-FG_>e=|K)ARmbb9)#-=m3Jud2F$m)5T48 z^@Q1tYfFAm(~ciMXvI_%As)d3pl+C9nz?5}FjoxzogNdDp0NVo_Y{Q3_@5tNpW|`# z|Nf1D)X&YG`~&5aKcAlb?&T^s0tz!n#f%@D@DZoD1}iXdXev`W?O4cqCH+b1BvBhuhccYinA zl66_*iyo1RqpaB?K5nqB`DU!%`7zePY#bIs%!z6qg3m4n2$so+n1;48LNw5sm0uH) zw~XC!6>MzndEM!DO*s!WCJ()Uz-T!ZG~rq>d2&6_jQwqBk2ipYt+^(+;#O^6`WMw}xo%W>9;AX7cOTudq=@ z`EK?@OiZs~@@p?$A_~(x3!%}vWj`W6(3hp^O`Z|~feFZy3w+K1r&A}QMNK}ZYU=da z3+4oA8s0zT2$C@FGTKWp(z${?%ZphHAqFLn?}vTa;?Q2wE97}*c%VRPTq|lU& zK`l@@E|596&-Z34BNmT=Xu7ujSE-dNzubRZsc*B$$=fe5up#m18KI#GK;XME=z<8Z z?!;JMgQCP|ED?z(HHyXiOC$n2lzw-q))GO%k-YjkqZAKD>2AzGb=CZK0OeL1sPZWJ zJz3tpx$T7S1gt^il0pSoPvZzk>U?qX!Tlbq`E&}{>Z?KjI;ise?=!l>iW~u{tp^Vt zytgORrQ+AMAVx!VdZ<)NQcB7OMPo1m#ZiUDNR?5eW>$G+4!?MB%u^| zOU}ZSc6ch0;R1JMM~L2Shyl!jQsR$O7OaI^6txV8ONb*GkGtA~qEw9({mk|2*Rj!r zjh{QDE`~<~sV$*RdORV<@itB~HNtq`IClyOVj&SZr1IMV=xP{h{$;4r^Q0!_U>aA7 zZ3b@ara#{VMjNIWCO`7wIQa&g{`-!V7?V?5T8+ZyQiq(aGyTsX1>nP`a9P5qiTLDD zH>nWfvq7UdENzeg^^)$#6ES!!$aLM3HrUCaZt?N)fj!cgxylyjQ^dk0c;e}TwWqP- zG^{FZ6$1wm#1fYIh_j(N>iKNC$W5$#v>}M|cVPymazz{k^Kv@OS2>l2?JMa8{ zx<*H7=-USay3bThSq?oHl=sNL;s~V&uqcIW5*+37{y9|A0 zI{O(1Zu~Io0%v?CcXktD72PUv2VAQV$JOk&djBUz1>s~<8pyaKw$EwErWMTFyY=)M zY{u1LCi?56p+-XnA<5DQ%3|M8Y|iI%g6Mj1UbR7f<3?wQ#U`+>B9M6X^5v<#@l2 zONM&?YRtuf@wM#)wY;qCM&v#7@UHq*#u@vw!*Od${WVuL{PE+ut-3;v+DW)ieTW2p z+B9_!MqXjo(K^xPysQx-!@Ugt1a>*a*rzR=^okhZ<|D_`Tdo4V+agX|BtE&@SUC%6 zdLbws6B@4~LoOdn(9`(SvxTUq;4QXodk)9SgPsH-zzfuz@PNiJO9y8VoH$+-oDTLO^c7;ZG)03$v;UNriJ3*1>E3BVYNr7w zV6wN4trP=egn(9)gtitp{8m7q;;SdR%;0t`5s$Nn#i0X&OjOP+YaipTa8O7HiKa10 zT+Vdbz9n4Y7&=Ury;}f4BtbVC8s_6j@4k>n#*Vf|;q2DH@J=9?$1v_ry8L0>5@OrE z>{pkBWMuV(0#MaZ{fn0haw5LDSMU6y0Mp~Z=8Bi zr!v`c4ZY+TnyJc(F{v{$PIfoakn5Tje{;Qwe!5Za?XE|xHO8h;^%#s4RE6KfbFuWx zyURRTk4k7#`)&t?hbLG4Ub(xJ0^@$&&(Hh&vze{S^E+bwSTiIjk55#`It|UvjcNTT zXOvP4xo=7FJ5Xt-;Dj+6r~mMwpCM2JLqCXUk$kF+Kiv1C^&S8@qDq7oo+9&|(N8lx z_wL#ibe_B&HYFi&(c1lR8CdVs0r6t=LA-IVp5>W_gRs1N=3zMq!NOc>U@Hc5`B$KB z@eR|z4rsO^31Q^sbQncUCNq5oD!4<_G&rmf;0dW4$#uX)Y>H`S*Lfig53B|n$am1C zlOVE$`O*V62hnSDUr$gO>a4BL%lgTwME4fN`jX7UxlgP^giLUY5T@K~K28lPFFIZk zfe}*+ekiRhzpp5jY5o>XE3I7lgZFQ1Xk)5%S^$4Qm;VUDrPLSgvUFE2fwu zF1RfNpR|(^T%0;raT@C!(QnWNuIB)Afm|d(@7MN})O6eG*o$6vWiSPWY5nH}<@PgI zj(}u+5ftOa)i+>tcK|&-fqY+UT7>y9q~e<0nZ8{4F#`k`660;>h-w1n-3O{Pt>_Ym zp-k>c+jpf(Fak=JQ?Rs6p z6Ajq95qSIvx`pTb(qmST5Z^AOzM8g|sE&*upv5`DGnQ^ukzVgQ0f=D-BlnI$6jNVc zpM>Vt%@GO<0K{twOQEoER4y&kT63WzM_nF7KXbChQ5ll3dvz2j^+v zkAovCYmX*H5(Mp;SB6JN>PF%%m?SqCv2Lv0cgX9(+3ku3d3hY(szZ480uRJTi{#9c zbZ6VWxxE2n+coX;JkSIx|QVVCkb_1i706H&!aQAiCG1^3LqY{f02a6P?T%xor_R{bgeYB?I|P)^cKIT=8$n zuzvE9QW@lVR%8*wnIN2Gr3iAihOuLqE;Tyr0I>2)uUXTIbL{jdhvMYKhKyl~aRwDp zN!Q~UrB>*QY$skfTkyvLWRjp;_YoTOg8GUO;^%|xHCAt)l%(YArF2jbB4hCz2_jcx|pP<|@^unYr^u`pX>AU zPA?~SdNo2KeNF4P#IEAaGxRFASMuTLIv&p`dx`BCI(b%nWCY!%oi(!;kOYS?oSytr zn{Hu7SNwcwsl)fz1qB48f^!0df_vZblD7IYq%T^&e%`0$|AmA9s zR#-5$J=IbAgeKQSk_K6Il8?A=E&Lq^$*OFLzo0hq%NfnHBo5>dm@sJ7 zyC)|Nd`-oG8~J{{DmBQ#wS;Nf{;>)zq3L>pm`G8nZ+M0Pm0XD-&Qx5XWMbJu^in|j zewlY8*AGPsBFW|P{f>sVSaB&CmdGFL&4=VuH(ViP3Jx)zZ}5<(?xKchhOIN|g2U`( zGH4{`sd)%hGcd#q zH#=|RFFr6XC}cHA@iVZ?O0QSWK=&~yMG zRV$bMad^wF&7wg13P+k#$(o@mH#p(4 zCj%FbM`|+df+xV6qaiEK;|)|1?~<&5h;tahr&APt?czTfrN#rKojv0`zsdLsh*|J$ zeAJ^L5*e$fu>puqkd!}H=NCip9RVjoe)oAl6}X7iUw_4F*qu=mG6AYAKJEd9n^$8N zF<;^Fe=gS)b^#3d8qR``kB;pv5mF9Eq=IaK)y=7y$`XR3Wwf4L!Kt8q%%=2>Zy}-> z#hhG?ER;tA)n@hJ9*&$LLqFd>42`|p1;3i%Ub5GQG9A8kR;I^6qr%n|Ub ze<+{;j{<~r8qRa5qk20%N@!Ub1qPM)jvF$vbCe(VdrZ3U%WnGr5T?8p^l zWK6_fBz3?+U&~jYo(@3)T!;>a#^K5+m1ZkFIV6)*cBFi!ed9se^=auv+%D8Ux@*|Q z4;{2>6r*a}D($a&-sgxraoc9uy7WaLGhf2)(FR|5j7G)wBC)xg(h#I{ zwMah)OKJ6pBeLN{5!55juR`XF)E*@+GNS;Wg2^t#^$J6p7q3DfTMcPsG3Qy}k%30s zu6*AyH{SqFIdH{hB=S#CH#@O*mGnIfDR<@3i$_INZZgrQeHt+WTug)mD&d1CX+ceg z2hCm^||&jD8vOU7L`1#v>!vf6g#BgBwqxVP?oQyr&00z-?W0`=x3 zRLWpg#*r$PS@~iI%B)sfd4|bZeDvvt{_?&)fDD=u%pw3|EY?x1Wp6x7=>QOkamlII z7RhqSX{%wM-Oka8vGU~iX>#6XgL%v0UgX3b0%#PQGD6QFR`3_swB##Jt|j4DhwHe@ zh|w@I%{rCL=L)dR2CwX*cEkA3RchLgrALSthosmB)ndpIB;jBqs~~oz+g<$*0(6J~ z-72Q6wcQnl9Jf0HF@)K6;^MT5$eF*#K}1>s z$TPF~iQ6icph50+OuT`&+=EfFh^SM->?z-5;?~Ic7M+2~W0Ty#vXHAp+kfiReKpwz z%9_v3-hIpeJav1*jw}N&1`$dO#Cf>gody?qi!xW;U?L2VqyObTTaX%*P@YN?)QjbZ$N_pm2DH*e-vep9IIMMZ`)YQk1?=9N>_rv;njvd9 zCo%w_E*B9~G+c966#LZ)fEtqqx3WSVSv(S`LeT_NYXB71O7X=!DXl7Xq)JEdfrc=) zNVvGP=hvu8ibbAFE@|qRycj>p$eIRONswIrvofyL=ke+V zi96wBhpfc6Q2oSyFQt6ScjuF=onJO}Iw|ixVEDGU!^CzVoDZbROkpQri4}dFlvzrj zKc#9qD>6Yp$Ex*!iUMqLe`4h59}a;pH!fIb*n!2j$T}5{Lo*apvXledhvka!_^*Lsy{PPWD$z`&>tx`l zGkgAbg4N?p%Yef5?)J%jV3xYUSe4hM^CijVF>GLd*lV7s)u{cBjW~reh)-sp@9Wi7 zpHZ?v=`O8m&>P-HoLz8(w@4l92mHRLKCO{pZJtkwTLfBEE^9?!h60vyC-E&j1+T#q z1Kyu*B06K5UBVR5;d3TG!76KCBMh9WI5a8u>7C~xYlWC*f?j2Xj;?MIFr2}YXS0V1 zuz@I^?lT9=g4vh?z$-(kt&V`Qn}Ba1VH<;D;s7+YN{rL3eEPL8)dV7C0!AWdv2uL1 zu+1z2Pl~{T0pZRevspoV)AY(`rn9oK!0;D#2%RXtH8fRGmWmM!#31t*vehFc8>1n^ zuv~2e`x`ELC@JEN&?0Uoiec+Rl2$-#|9yd{GleQ+k@rL`Q5AOq8Ma}%xd~(2ew>-ITe6MF6l#V8tg$12 zAtE)Ae_VwsiRk#jsR+|gA`JO8l3AZ_k&5CHCz=T*ig6r`j)CdO;3bIWmlOd|AH2s7 zkO1m3D;rKti_S~LM$Y8;^Jeh0Cf}sAtZa$8*Awz4KzdV+QtOF>H}rg`54GBm05Pl% z0Nds5qUEr6NJst9r;;*2cda*JC)Dcc9s;5Ws=(Iq>YIp()ax^XH_o3wpA49d&z{0I zYD17$gui(6=8dptFDsE(kA#CFB#h>ScZsdjjMZxl4Q;hDKy_Ff3{fE{ayq{D_DS!tdBVST<0nOHI08EIy z_5%|V(&X)@-x7jKAdb1|=nJV=q(`7id=0qKV&>LQtPUga4X2`&>a`no3D4l`~r5U{PAJVos!FK zK;JR|V&mEY9`77yY-?TeR=nkmK?sv|&U+xT>916*u3ou~l=@_e4EF0(T}o&Z8wKw0Ij0WA@Y^!j0vxybHH zFjs-CB8ew@GKgy`S;}kIP+@24Q6LD_1*2{uulO;As zM@K;09FnQNw+{5=>mBnfq5(fDaa1w#9o%k+!huGa(|D5Az!ycY!46cklOB^QHDR0V zu1mb={D|F1tid!{@m9R__TJ_AQH6OcOCNVaVcf#2^BxVAm0lE3umb(qg5Xqc1~25- z!zxHdSBRI2+-J|Z3y;pV%RU2af`T(0Gp!>G;gP(L_h83etM>OSyCPEd;JQv_iTe4> z5@QRmbRf!I5$}9X#uy(wSPx-MN?xZVu_ki}uPS$$)N3X9A;pVoXK&gphaazzYCjJX z1$q!%KeL#Gt`NQV(j^q9?i=ml-1}32+P)F@6pY(DDAd7ftPaxuA&e~;kKCbf}Ruk1anr`(STFJ zaYKOZ?U_+v+~8>LT%m}CxTuy-;2Ek+@tUe?R#|ZS#7aG;?hZ7#kljE{8&tY&Rq7&p z_wJ2U_jW^0&F>GSbtrk6D>a?oj)T|i)h#>jv}K}|2+?|kG}7N9FQ!a8NDPaw(^DYp z#Q*r3D$~>%r5guPHU)3(3Kq>>BLMbR7iG1E{zU?YsIvpO^# zy{fjrkBJGE7g@W)YIk|qbFOPZH%cYSh#ZGfrx(49((v^vad!xnx3ZkD9H0HA#6Zw9 zV_VTT^yNixvkHXUBYBc~)~s_dm)~)r5sK_VJ`2BDptCpTb15?i>83Ax*5>IPd)z)J zzjrosgKVzzM!kt~rzWmvp!nvB;FVkp@oQ_@`&8uQbYtjUO1|dgv?qF_Y{78e8nZk# z1c9Ku-WvC>n`cr6i32d2wgjfE^%rHNcjyiA=3vfgR z%csxw0n+Qq*G_w2n(bguorq5Y=P&veNb4&({(wFo_-Lh9`Oaal`GhHOKm~9wY;DFE zQe>lv5(=fDA6yhscbPk$_D!sR%h(m;W#Lr1hAS_q7U+c#qmVK6%P-OAc#`6+_!pgz zXG8&6M^2@xN4%#%d2|HN4M{O5a=!rI7W6#mx{4qM07;hfIerjkD&1(OLc%XtoZT^> zOSEcm*NA7D{M|cx|SNPxi^Mawy_u~ zSf`TLR8}s@gMr0$7K-vzIAKiKw!f%V<8^6uAkZs2&@je<8SR`CfjTA3~R9w+{&P@|H+w~)&yWI2co}U^+quy*-wceePiHtgC zNB73*qyXY1iu%}lln8+8l-IlmHaU06rnMV3^aFyy`sb)d=9W11x1K8IO#f{aG>B7s z!9@VwoaZFVN0LAts8HgQ2f%WX?Mr|1Kvr8Lg1y?Suep%hGZBN*=w+Uo*!GxTHG#vU zjrAZT)(GUdCv=@Ug2M)LXMYWqKwdcoZi9=X?TQm+g@n^-=u-TC&S*-=b9jQAKEDto zk~%eo6b9H$S$I%SO-pt_A%JEy>cM;h0Dewx(xuheEcnW@nV-gFc%cL=SfaRxap0-Q zB;+=NuJ%%4q46QZ&+S>|#sh7FdpKVGFRcT8f=R@hWW0c_nB=ntEuc2tVbe4{olu?P zHWjb1+MI4#sLCpqEfb!>IcG#~L85XQin=%sAIuz2!N9ARZ(QX2aTZZ;R(tj#7-2B*cPlSbb7wTxYneGCVu_!mp(i0&?IT zk)4{1I|VIc09a}rMd8b{7!eO(Y^N+zvKMX!YeI7>HqnA{d`q(aJF|tW&yxqp-8!+xV398Criz9 zexYmkLCziY7b>&bq}&Nb^nfxyX{s{jE(A*4*=j5MoaeMWEEbDYaZb?ao;8Y+^q}1( z@00=jpO{^$H{|!V_SIzw6Lm`arkn$4?_sH&_pxZk5k$@0jA0rym(znQ?hRYeDq8;K z#pNNDfG8xe$PFtCKc?AmV`S{vqXaMpnyABwIO*{kdSVLJ42Mvfvahs`1qvrAyqS6 z?ePh84>2c(mHI+*Ku|_-8OB)ZZ{X^?0w+e$DE5Uxl(HQlrkAK2jYhZzOwZs@NJ7dA zeRnaws1-ir^_^$4Hle^7tS#10jSJl)S)6gAY_z|nwQn693AN%(eEe!)E+Cn?>ApK= zc3rxKH<((aMrl4}8?|%Slc;rK^EsS1Pjt)#3Fe?ih=T1_#}-J;Q|K3Ruv}|D>)bL`mlFUGaDxRs^%eV%~#%sK3NsWMg;{4QTh6!LMGhPr3Mhe{&r8mKkUi$=Q**YzBb+cK2K z;Ewd0g@+t$pagkOm^~9g|DA#X)H|(+jC^4;KUbbXdnSfc@ms5|2^kW553oqS%7``+ z7Lk~cfZ)s|#o^3>p0u%sheRiD!-ncH%^hWp z!R;kdZ0kQlpPiq1+3+E7#m{Y=b$BHyH5qzzr)3X{7S+7*7wO*K$=F0D8QJOXL_YWF%Pk;RJ_=U^pq-YR3%#}L88My6l9 zsCD;4q{sHJN}R+uJIXY@y~%Kix3{-EhQk~l9gA+ru^xBCW)! zdi7E1*tj?pEHZ|p%U{1vz_=VrH_R;iM(hU~3!mT*JO|o5y}ZgwmupfBy6CC3U$YbS z=*>BfamY?m^yHE+9{qIm0;)D|&@sK|Exu4G+Phocs0_8{0u)(D9Z*hAPXEK6z2k~f zlf99a$?vW*;|+toCz6uZN^Cq&iUGSrBvYKz5tb%b$bXL=O+)jKzWZv}2NrAbl{j{= z(|zKqTd&?+r=a_=Swx0(H*}}BeEasTy*)82ycX+$d^XW7F8GEU=JjSQ;=x~tiHUKp z9TOK9x7N53YJ-#{CGpaztaxrSu}0nZP3;TN25@<4MdCE5X}-jvVcQUK8hXzj&pI*A2?zT*r+9& zAk%n2BDo$orWImLAG*&C?#pM-wob$tKP*_;AE^<36tM)Wt**Mdy0monym$oWT*{L- z653xnJ4Jv0{qAeog&U8*K@B7#Vym3VYZ8Tg-P-lY4MECD;&8&!zFB%vbG0v~vwV?=g88CknE8i77q2=tsnQ~t5w`s;{F%BOv;H_D1X ze8|n|dl0g@4_zK9D`S-0zP(??*kEO~Z{Hgf1-fchN-=aQ};6+xqfVMrJr3Ms_`m?@{G@-3up|185?WJoP$5 zGZqpYQ%e`y$JM>fh20$;w?{OWDxAeDp%Ajgeny$XS7c--e-#w$W-=JPmg@r1%@P)7 zJZttPpy^2~W2BUdU#;`raq1K@QtRlbsOxhcp0c-BS5pI2{~hJJ(NT|Fn$gltG;R%j zPLo7eb6sn1Z@Q`7>p*b|YkQ(#s5~`f>UIY+beLe=M%FrDSFfFgg#`mOBk8BLSMJHI zULB7!Mw*p{6?W~}Q73B*)C*;&L58#?I*uDp^CTBuoY<^xf9e!E3h!l!enA^yQBje& z&-bmk4Q%yO1-sDy@AR;A&FW(*=5uU!V6_D@wsgLvAlySL%e>cQAbagbXlN!*F05SA zM1<(Zx&hC3_t5ae6;HYm z+lRh=to{||oR<_#l%cXy!;ox!<@)XYhRj@LG;P!(Z7j%nLOin9kix`<3r{1jkIog? zzXY4e`cu~-e!X+4y*d64El0sOZ^kbX?co8bGt16;^e6?9nZqWI)&kCxu5IeR+}f%t zy)vQP(h!qUS!>JFYZxUJ=}noWJ$x7?8a4uMv~_7W^uv_F*M&YRe*S!0?*ic0!+XCt{ z_HjAqTb{FXPqA#8SC$mT`x{PJT3Vv@_KkP*maEQz?_i(fsRyQo%CvWNb@lbhE8g6d-)W3*+xARNp38PnZ?)v( z80DS(mD371?h-vK`u6R$bqMUWU7T-O#lK$Lt2ESIj(z`K0pp>$7ek-#P7w@`q0^c8 z&@lwvm`uSkx^>r+zDi{Bjj-Kl24npeZ>w)zD5>vKno`!fvUpXu8ix2xz9VuaO^>K* zi34R=AmC&hze~=8dW;R6*Qu-3>Bdm4g^>Qn#pxrgr+Tn)cI!3K^dP)^d0LjJPJ(Rr zdV?DqbI_d!cT@i6SS2d-MZL)sv|5G5b<`wQfp-xRsFYEjd5=^4JHJ{ozO!~Zi;6UzTXIcUu-58X9_UxXtv_W^);&`XiS5;~{HttMvt8z$`Lr4w;#iTkiS( z+Dc`M;)P42I#%G*yPp!Z-l7~Ppz-5JL`)+nB^ZWXadbJET5yGRz5CJ)kz_wVzXcy{ z^?t?!Yn+OQ`OJ^;QZh2BM^!Ei-ngdae`Bx1BP)eE4;U#!LqiV_j}s?4PU`I1wd>HK z_edW5KIKd0@9%Lc?PdMu;@u$aH1E-`oaO^-XR;M)>s|#3iO^8UN$x+t5b^_!t7icY87CMF|yM{keKog|;%zbZ}Zk1WW6dmQjBdke3N zn8!l=b#Q7)b8~Z9*}6VgQUsLj4q2{LwydS)hO-)U#9Z4E%{#rrBFdSiy zuuXjU__5%xzj}~R=LoJkp4G+01?b0~-=ddnuk7pg(ojEa`s1_7uL1)5lklne=&tL& z#*B@P-5DsBwsV7`Ne`~n0&ZtEMoOMOeF}thm!j;`cklG);my)vX%Ond_#z$|{Qez@ zvOO+%t4N)$H6A4(AV$MC&oTxA2aypymTPKvBBl%^O35kQ?Qj6%_P) zX00v1f)0|kFUYR~0(bVmq)gY^xf2AEhW@d^iVq(=NaDUb8b!7M?I#ysQxL_-XxLV2Vrp8Zr*h1Pv$}cSe_5xsU$f7lLwJMF z=xH1kAdL#6aDGRpvLC;6XfBWzwCs?mKUz)Z4u%tpajDzhgdaI_1hyI{4z0YRqM~5O zgy(&<2W?a3W_5`ClQov$%yAiv^kwifRo>2t}>&R(KbgD4tFbZSM$>)RWw2e&UjVr@;t z)sS|-e)UT9v5S;mjG?;XjIY^f+eBxAQcCpeXU~pc`;HBkOSN9}h1W_-N2Q|^7-Q8eF(eU5^zPW*2Nq_i1LK}kESm6er8x!jbBgj>K1(R2i_nYq#b@@4JQrytlJ zS=Q2$ec{3dn37btB`4J|Ob1_I_xuE98&C4m9MRaUg*7t%OhhA?COLKF$lVBA`E+EW znWASd{jZCgbmoizwwy#p-Xn6G{_^z1ev;-@1iv4{kE4h0)%dM#EiFpYmvI7%AYJ$BLjUQDQPPGi_6R`7DH&M}MoED$tLqiK38Z{Ec>?SaQWq zZ`uAvG!3-I8s*nwxX#L6Cc$_Kqf}JQYcOd-(F7yDo+lS{Z`L}SEjIa~?_pKy($W)v zKBBAlO=ac5$jMq8ZDGIJ-RCb}z`-WYc!ydenedk1^Vo9do;_thI`q?ru*R#KGujPDRRq-Af>Z}gr9naIeX3Hp@9K| zX(!Q`02>&eXDGX8wS+|6XA!l8gap#4kB03m+(e9-A(z7(;=yReOD);AZ{H@#I`%Qe zHP^{$E>4dIvX9I^DlxkN?|@Jpx$eOC@B8y>UApUa$l@($lrrJ%(-@W8ESDo?Owqbp zvKRcfSNZi%8_!Ojly3-r8WbBg0ere~?fK|)()KCPeDEbjp0MMfc$ECm3C<65pDBx8 ztt=K37iZunK7G1nDAD}nMfEXzVCMQSQWj}pv{Oh36uq90y6$gS#<&+B|I07G95OLU zuKzf)Dlac@#`NjQ_%2;t4d=G&VPRo#649fw%E|;2v<+K=f2$(DX6=BLD^JqkV}a*m zcy!ye;NF(vK>26JzSp9nmgW8d)4E{El6CDC;|qV-Uh=R41@>2%mMjEz&;Wdrm38n1 z2IYnWaR2f#@7$)Lsgk3Sy37ue2>Kns`y2|heFm?U9_ z`{V~?#dRCQzKrU;eEs@kV`FN4F8VN0G&HJ<{ljU;qZ*)|nwkv!S+YYeGSYmf)@xBF znS1@0l>l!)w#G;>uCQxVCq6NehSEZcYdWC2vWqvPqE6k3(=Zvv!Bgc2__MaQ zHWUOFE85=tcx$g4(5Rrk!>`cy=mC>bm-I(xXD3o(4%*+)u6dU>!}Z#ACdN^R^iYx&W*GN1a?CmXZ*N{ZwOkvU=9Ul7mZ|YDj#PC4!qKok1 z>u$S-!W5C*4ZIF@E5pp`h+5~(o~_Wfl16IyrKHbTZgr=OO$C!lADv5?i4^(A7iB3t z`_bo!UYuNAjU?I>l$6}v+=d4SqmUPnQDK}rsH*Jnzn^%&8QjDePu+;8M@eM2d-&D# zxbW}~a1ON{RUvT%hD#3*0I&w%d@Sh9nKST9cy^OY* zFbbqYi7OkB^VuT-C45|0uKfC`MXHjn$-sO@_(mIS3c~pF(+5)UPZSgsSY9P#ay11f zkvl#FT7b~lPelG!r$wQckz70uQEm{bJEGo6|sH_|1-j|}x*S(~8*!_l< zn8`1XMj#UK6uGbqC_|=f9eN~u09_+E0j19Z7H|0L+|FU;!%xq^Am-=f+{QqqTkcVN z&^ISvqBdhz4D*S?b-EOJ92W%pedv}YjfA8@9OY}GihF;bWqhLbkt4xc z3aSbyrXwQNK~i_D6}DAc4ZV6&=fibGY%N!sX_%T7C-R>H=VUmPS; zbznC`%jn6sch2s!%q%}o-IF|H)>_}+j$4$5!}IZV`}al^6$A8`sgZ% zPRA=NIdLLog923;sU5*yb8~aMOctm_ zq2i_+uAt?X2R8E|WB-s41KiS85)#%Q9vz!EZ=Nl4hEcM9Yi2P;726Lsi^owpKH_UO zU2^#y@SH9l5E?Ji{3n7JH#fH~+K98(jvYxAYk-=3?d(i`{SF-) zU9haLt}*4zbAtNf4&zo7dJZH2io`)OGc`@AH{CRZB5ukstjo2mBr+)>X=Sml@~d^H$gfMBOZnBR5zbP#EOdo6JNc?+OEJHPyNod{ zmg00?pZu7A{qO$0`|j?Z&*ynQ&-;1a@8^>;6B1A&86#SFH=p%Epqci7yiPP&ERhN; za>gIhYO(iH59bazIn_H^W*^zh%QZDZ@&*!4FUgso`fAVPQUm-_+Pf3w>I%0m@j4dNZjdw(YW>VW0X2s2OBYxNwkz1GD3oBF>n*XY{n?7noOH2e&{vQ0PFM^X(fv zxL*1zFvBxJ4tNtya_&i(XWooZfcg`8D%66~0Qe;mq|ME=%eyDqdm6z%Z5uso2)C}2 z9R~*Ps;a8mJj(S0n}iC+Ma4S$%%=6`p|Ria2%0K$yu6etvvq76pn{{L;})u-cMGP{ zS~b+wWeVNGgG-x;&NNd{_K-qeGnt@YW)X_2k8>>hQ)>e;F=B6=xs(n7cNiqmg<^I@ zYFgTTdujUu8jkzqwlV+!X2h`HDOG%ykLW3}+Y*UIrdvhU=vtMPSeEzHpyWs&KAd=3 zJM8({kTKBp>JKMMWe9xn>ZZoV#UtL(IP?vPt?KOTMA%^Yu>4$BpKZspbFI3J+l%X%Z{K$I8=^0|B}!+4!cE}jhKNN}_bo9^%?wM4Pe@2eN}B2! zollF?S*XdVwu<gpk8yW5zz72f?1w4^q;i9Tx6x?K@p!|J z^musZB?Lz2ZI7%=V(Yi3vhJkpC;T(7yl)eBPhl@mU?B%BOD+oSFlh2E(K};b>>umo zltF=G)oDuSl}JG+O$rQaK~BpNr2)jaD56vmZ|}uLt;kmj6Q3Vn+Ls_(BO9sLb^}xK zTz%RMm!nh%Vic-YYfw`J1?$%qNl(RNR!3-1w(r58sI5Qx&2s&7rbDgT@W)GdQ$M-C xht&VE0ZRlX`ThF;&@W5;H^jdh!ea0HXdk$Jz{#)UGPH8>bciA>sB+QToFB7rNB95$ literal 0 HcmV?d00001 diff --git a/media/images/gemm-hierarchy-with-epilogue.png b/media/images/gemm-hierarchy-with-epilogue.png index dde58aa8e5a62fbb1c7acfb015f884f324607936..e68cc0c0de8f45704e1c4c761f3cf0f767a571a2 100644 GIT binary patch delta 226124 zcmZsD2V9fa`v0p`xvdKAZ3U54uPAjOAObRDOO-}IRQARqB3p>eKwerav?^eO?4=^Z z1i~I+Bv>oRjDR6*P(~n-1c*t10QsK;?CtON|9n1@^`3K{_c_n}KIb)oDjh!~U1seI zANu8!ork{1exCC6m1Y&cQ;(nOKJB0W_DYX@iHpB?V71M#qZ1cXlwV}8_R6Vbz^CMU z-W24C|N2_;kwfxkKfEU8Xn7F?iN0@MhG!G8S{;EYbk5R>5n+C<+Y7@nPS?^S6K@dM z@@aF|sEFq|b{h>0eoFer3kHfx^;O;z1FJ0*G14vWO}G*}M=~?~F0b@6hC*KNf*@#^ zO2;Q!w^prl$f`OKr&gC=@pnMB6dP*dcp{O=X!LpetkFx)%|?+nHNM(l`$!6mnK+^7 z5SKsUiMFzKh*TH`7rU!%Tuq|?s*Lbp=~=XoBkY>oOH6h^ zR!rPx5EsX=^y=Ka$<1NZFb1@{Mg04zQ%Vx-D|l;%euf~YvAVW-X{pffxY`oe#Y=}b zf5nF&(Zo>mqLQOxZRarAx{17I&K(Rve$pJuuX!~*N~KB3eFCmn`HbHgE_ijnc9J11 z_Wuf+_?(5uSL~Mqvf3=b5t{Y;fGq?5z*>tGpknpr(F17LCv9>C12kf z;{Hp#RBT?Ir05L<#d7Kx5-?R3$Ij3cj4*nrmV}URpqi>c0;X`SPY#u;GkAHin9=2{ z_!+p`URziZ8N{um?(66@!mZupwljVM8wj2K1$NwMVjw!x(4pL1Jh>=`)c*urA7c?{ z9X*My9>#_3d$ z#gwpy@u}8(s7R_sTv`JulB(~y6I^Xek;T*0lsbrYZJfTu(y`0YxBNYJKyiz#R~{)G z>3ZQ4&zVp=VPw!uHkBHey%!7_=E^Y^XI_S&9T~q zO`QIl>yud~ayIJ4N)_PB7?ZVh#vru=pP7o#85U_o|OQPwE>b+Zr)g2lYZIm4*PeRS@8<-@Ea5(pJQ&>7Nn<{OC#%bYni=)qg zYu&RS;Ji*b2+Tve#G5vKhpI;XRI=n4OU$W`;VUBuDlW_x@Vb-{xBt~Y%6c_Sm&(`) z**`DEyN*ALf>KcDxmh;?7k8C8D2`5{N9Z=>o+%-^Zvnq)!+pi*n#b;qDPasS;P*%G z8g?Y-xjAOrG*#D!F@ZV(*Yl^cO|ACe4|JD~PG-@m(lNIt!rHiR&R*#3BlVL?9TDLn zmPzk(Og2UY9he$@a@!5GF!Kf9Et?rO~4R#r^hiZTe)Vs+nr#`28Ng_q2 z4BbL}18&3JNU_M(;5H1Huf+A?rKJ@=fzJ9}M2COfT{?zcR&t+@jN-Q5=Of0KF8>zu*(RH5s9HY4`L~T_aZY)7lqL{NS&xfTwtcqksICW( zNWE);aV3aNUWatQpqRPk5vprTPqLo3aob?F$T|T{V6KcvK+TaE%86|Kdi{{uuZwf( z2;p!k=|it>j_bzi8(=*zc|(tQi_KYX8-1@N-0=}`(vKBMxK^pGUBIv^j=nM~v`KtU zP|y*%;$~}A8#|js!l_(M!3Ty1Rn0f+Qz(sDFVIB!hB$@1dO%bzcD({`cybe2bRxK$ z7^-eb9er7B9!`CqN$dyi5s-X_$Ts?icTwd`;@mf~ZukDiR??S%%6>G6m1-`ia@ZVG zaBF%V$$0Z^jK!QlMjARhAi&dg(38M`Nabx8!VvYHBW$}|!q`=prq5PIbN2rspZQ2C z*EaeZoK76Iz!-;J=i;ddXTeRV`LpQB_sk%znVf;C$cY}Rw3=3JUYqs{qZ5g{!ASF^ z&LfGX4u)Wkmy&p=%tj2qqhS9Yk#mCl4%|bFUSlMu-WBHb^U1e~M)|bvF1PyfaWoU( zz)2Xkiy|l*gnsZ>UmNQ-oX6W9f(Ln_!~V4&aBdrSpLi%p<%<;iZlQHxKxhO-S~ntS zJrzf`Q&aenb?=o#jQ{&Q<&9aEQvh-L*C(jr8VO0M1%>zv4P&d_VM+$KQ>Z!!#Q4oK zLzUD`KMse3#?=hv&hv%?m81J~qpfTx=rK;h>Rj&T99lA=3MZv-Rs%km*lHS3O>qi& z`^O*OfibO(oNTs&wBilzE((RM6o0{>W`bU9M_Nuc-PmLcH{WQ+aKlC))FMjseU>(> zeY18!_AZqe?|CgM8t;+m_d4twx*Z#D?6C1Ycg+SV@~Us2S@WNm_4A~@_yiiLk+NnN zyWV;;8rNFy7S&feOjbVjHwo|~43&u;7qE@`1BtTrYaZ;`M_M^=e>YVOv!@S`g@HV` zv#UMykPn}#A{J32VPUlBDHj{&FuWqr8;O|THyS)i&E^ewmwFL%9eAUC+}udFLIlfq zs2s(=mP;S9Y{VyB(!=K^7|fMXf?ifDd(*x!QNnxQXJN6}Ig{s0;HNQav#W|cT z`pD%`joS&-nlSd56JRkQs=qRayZNNDxTZs$9A$Vg-Z-S^ysZCo#IR?o3-94lxZ&8( zxs_y0##$kUQAj8XP|)bWHLNdp@DnfOQJjO@jyN$mJYFq1y0NV6%@?K$OE31QH!kv? zaEc2$PBkoWfZ)Tv_j^%Vjq`|;g8Y0E1A<+m9IL}x76z$ac!Pcr@5_$!bOhu4jCFZ3 zdTFyb7jIZnU%B>uQf@2oKFy)kRdw_+#>=GGFco-&k*zhu!gad|&N}rcTC3V2v~E9C zo5*@KDIF(Ou`7x?GBS%W;>yu;G0QJ48Sew5h7t;mCqD%qwOVd8Lf{fTndIuoWm4(8 z6DkLraBCY3ritX>&K&CXJO1iNU5RW%&Vs4yijc%p&DFd8uxr67mscUBuyXGbf$Xyk zqvhg`i$nFq8{@0lu0{p*o-GS(8@dhb+!P*<+5yee3mB?6Rcm&dra&3L2PZwv7iB^N zq)@}u`B=wqhtIOTTU!kId8!v-^7Jc^U$o1nq`MRm91|}) zz3sBtG=K$xb-}Wyl!O7ZUpqAiX%F$^(6IlkLgpB=c5HRJL76~MI@OkP%al8JRQ+d9 zPfYRJ{qBiYBWS(F&a6kZPI%7SzJ~m z@68v$&zDTM%LT%>lDbyu8HU2oAV0-iJZ1^g2{VhkT^fZ;a+Q|8SEa(wC*=;GQV10p z-#~bdy<+)wW|PS$&wawY?CAS=OSbv@2HFNu%MX}i%ZxEo%k@uIAYd+%6vO@-?5#9xt~CLa{Vn_Z+_^L@Y>=hda8$Mygd5^ z_7F|{RhV_DtKp5+7mOgPt{t7=`3>am!Ytj4T1uP#^nR^fb(A;7ff{CKXN|@?+5Y=; zFs8q4msNj1xz5YZ(&t%~*RD`&Vg6`-qhyQ)tI4WA$8j@SdJRw+^%(Qxip&JY zJL({nBv(m(bKOc9W=A1N75l?yAa_NVhVaf{)_Z2%+MClTQ`^$#grZX=#&u6`gDGyU zB={{Wth_MRIF~SbjOn0ekTDOkX^?z$+9^NJiF`A(N-b*Ru*K`i$8Hw~I+c|;_{zo) zA?#)c@h?zJJhPP5+IikWN5)V;AEbUR8!s6&99)VEaJj+CV!XQFY9!vSf!Ez7Gbs^* z^~hZghv_yADaHP2Y%7YMuv1-uB}kB`-88h*_6fL_KfIo3I(Y@;{Ree(oPxZY!>h{D z{Lcg!>MwAj*S@6XgpW4~QZQ0G+$#m{ZVn?TXYMSc{n~pmJLZMkstR{8^r*SEXdFfR z*y5I`ptH^fIJcrx0le5)_2L|B7uTk|F7ork(4wvk&Qcwu)ZZulkz+*NuXV%Cj)@<$ zZn}}b0}KssW+~`5A4I=@RK3q?gtNw9=Q){nP>-rjnz_|`^X11s6je}2i{SffX8(CvZ=IK)aISy*)U?$YdY%MFISGI}OGZiBxd z=LOSrUHIvg?6O`%sO*I~nzjDh$4cYCM@_Q@$)vqeg?{Bho5^p#+`g&I!JiliHKocZ zOYT!fu8rz5Ab^@YA?%>&lx^cuDT-H4GJ&n)$=taXCs?Z;A(lI!#pD8)M36-Dxl-c9 zu`G-DM6B3Q^EKeoe**g!*SlR1x;GhOmUvB6qmE8RCq*JJ2#Wbhp~%>Ew5hYx1fOUt zz@4~}#kQeJ;>K|o@LvEcit&f9c?VHOsfDrs+6moycmj9#h;QF#l472BSEFXPbf;`* zpyE~y4pdq#yCGab+k+K{&&7{fMO$)-A`HXNkY;B|2(@YO>NKv7yjop&P+cqRd+qUO zX91QF!}KRa{(-HQout$k2mT>1ci_U}o? zODfu{)5TfQ!R7L3T}wL9Vt+Wv;~ zSD3DG&Y=ALAVDQk`Q2#-Vt`~phxRwIY12gh{t}ER&dHY_cXJ!n|Ne6*<``ivNh(GT?g5T!i;C~uWk}p_4L(IkAOL7@;Gow| zNQX>z2FY$^`d__B6ilmVmPOKahqro_W9e8DLyBW8aD=DM**!HrL@^ za*^@nja%GY$!@X#5XGeRN`^Nz(Ut}*)F%YR^`chNKnYYvRI^rojwu~7@ZgQWuZAsc z_$Rga&yJE!c#{?c3ovn=usgkiw6*O=PfpNO!|n-xp%LD@{IiJW`C!MDkE>9`CAICV~@8j?u$A;4l` zsH3i+9Hpgn}iozkF8Z{8Uw&-O&G14Q%l*^N=R4>hOX4=nRhMH5#O4mAk z*bj(SI0@=^+}}NC5(r~18y7-)d_DoLWne1QS_Bvkd=7+l!@kIvbe$O_(lS74A59MS zefO3@!}q+|5-0?neTFC{J&C*((svr=Wm{T2=C-k*KLvCkF}FCu|LW)A^wTK1jjXLY zzS{-(4uD0B=HU*f6i5FU19q0*s+wwuxf!x9S`$#p)osIb3=iVIq>d}#NdKeieEIhhAb-lusL)|X zipomJeRK<(18<#rH;+=j&DFduN|;1syP!>N#ou=#)p=QOP~R`WC7Z1osuEDmeO((b z+g4ltDXrSV`p0c-xYR{&G7FvY2=D;;T}aHVC;;(lps2v5Fw3SXuxV?9Pip_QC(m^4 zD8(3`*{{Xx0CzAzI(pYgSTFK+s7yb?hZ5?SqC&fDW8@0cZHC!umugG1w(C4g3$%8_ zZ8joz90Ip=V|n11FF;0O;CP>*koKy1Ini`6A+>)g7&5nLO#AjnY4uZc7!`~^7|US) ziwVWXSL10qGEiB36bbp;#6}Q?JHdFT1{(!`SKUy5&9ev0WlNn*F(j4N2Z+J~|D|d{ zadtK9m}nKKAnl{oK*jLyS9BsI>JS$&VweW9Kf=a?iUuVJ0}`rSE2YNagf$}XU9g%< zmB-a~rwKM0d+x`@tLy+V>c=_0YA#M{F90znFm-utsA5Sc$M3~VYy6u15k3|#3e@?L z;6~%d&y0ar0=T@z0fMBa3)wJzWc7Znstrv|E6h6N8utZbW=9=$mB zQm?g}nBD#vG;qq;A$s)N#6*IUYjkMZR@D4*3lICqHyp#g9T>9uzC7zNUiRczx2Qxv z(2{?@^(0;Swy^X@CkE!GGwv~XFFOGw>F4HU;UIWhUs$IsBg1H{B+axCd)_q(OKQg| z<_joem6ePvty6VNvw4ZGSAr-jggiQ()etKDv{e}%UrpS|kQe2A6bxhLux!hu5}NTP zOu9YfT{s$jmOh}Vt4l!Gdw&HD>@9F9%}U<#-R|!Hm*JTB*(ruPu9U#*PHQTwuO7^t zl7Nbn&G5=MuTF5@iF^>I5UKeNk54TT;nwc=^-%3Ysx+fl8=8c;>{|?@JmO0-}055Co<1XLa;rzF8o{7%)_7npzsA%8D|zMabDE z9^dn=uUVul<)*P4!UF_H}f}&>GE^q z7J$}}tPR~()*3E#6~&|zx7|Xna(`in;!6>!bjWRkb&w9sFt!>W`6sp-FV30|5H4=l zyD$DqWVdH_S5&w%dZo)Qm68^1uJhYYC^pQAo4~jyhjJU6U7obKxKDc87q4qaw$CQM z2jQ*xb0r(5J@BgL@L&*j-8dKG z&#hyORmbm!%Kk&aD~*hQwW82&g{PlXg3CmWwixZD*<-%B4x;QY&dc%LL>``B8YYen zG3uFrIX}_#AT4j!2ld7JgY^5dfq_VGX{bzZ#5vlprL&u{hZ@E0@tjezc5!oYjGo6m zlK_6Tqw0Om37XL^sQF#G?Fc7LlqCSV_4#&HB>EsCw>dVr-HlW^l)7tH1Ty4p#!=9V zA-dr%Jy)_F1$9sa=zr+Kr@Bwkf2IBgwsFL=?;g6qKL&8{g+)U^dgxYRUltH5wOuuA zO$@o;iP(6r*|7)DCXfo(*uEM6g50@LbC}M?lE@!LO$2GX1{}>sc>6B!+cASF0e%Po z?tCo?INr{K{my<=3K85PA}PupkVLpyhn>_4`!NLcm?D1x+&&4jrMNaC9B;5@8Jbk* zko5{6pur!|F(g)m*2uy2b(HFkV_=-0|TlPGcSab<^0Tkt4 z?+&QBALSRa4%AxmH4IX`f77h#3LjR!pgLD;n2Xa>_lE8J_{e=(4W_AWx3%e+!Q#!! zIRn=@f(|^wH}ufwAe1kipju@(f2l@qzzHua-n`1=l#83)nQey)i;sOK3F&0!)6^UA zNqCC%R$uAdf8B{#{-Bnh1RJVrizo{>tX{icy>}N>mVn`4x{aTB zBLAA%RW+a5hv%1U8`E}nx4l^NOm)NB4A}Xv3$3GiESdwy4ZvUJ+@Z@is@oi9rL9Tl0xIZfzRH4XfdHk)wKze zQv4-7)JArn;?9B48C+l~$u~XS)Q;>pdU=D+aEV(j&&j0K?3`z30*n}f@Yjc8d9)Jr zXHZOXdg#V>EqtK!B}{vamiBdp6g^ufN#G4a3qmk6d?bfe*tL9*-h=FgHvQSa16i?2Q&_d zGgI**I=_lQ>;)o`$cgJNAG(m&a~S+9NAp=5-6GH&;N!Xn<4+AWgt0$0@p7=p>zQ2# z-_=r$z4q*Z_vD$KB0j^7uXl|{y_;0x*A0X2)DZ7M>*O9jYqSBL{aVy|$h}e>I8F*2 z2Xh#i&>bu$msYZRZhm-J!mG9onT*#>aQpBSNKRs42uMy&|IeeGVn!Wf=E{ag!Pd$vM*h!ZdgC^^Fa7^J-YN@*J%Insw zp}yRWM{z3%C@S{-F3DKjH^sY+9!+}9Evs%!<8ga!5U zuILbW(7$%1XWlJr@Qo(WdbO;YsU__@&p<>@kEZUU9upyvVsrzt*ZrI3yWcqwe}STy z*ox2-H7v@?79aQAtkO_Q%sngW2P!3ofzDQAW80p#+6PPFjSX$=^i9Sa2iv}I!_9et zbi`{Dc#|1!=z2h-+wm;s%)#h!@I%6rJH-{*HhcE$5t0s4(nhUYCYO4^r-tDl-F$6S zbHd;6Gx36Ia<#hk3r9^A^lm@c=Yhe%c9?Y&lUDcr>wl6X91638+818#pV9*OxTU}d~(Qg zbySOm%yKVth>i-GEDN%wfiakka6`JD@wha+k#2 z>8TDC*E<}k<50aVNRl*y=;#2RKE)7;OA3f9ZTL!R^n|6Qyy6YLE~BvVCQfCTYsh!h z24j6C_E^ZD#q^^oAscjKbZ1xCSOvZRu93hLW|L90i`_O{;xJ-q;%-Y`k4+e}ekLVx z1TD(cYdy}xN1=gdgc8!)<5o9PKQVBtyl4&;m9KxN80r}UOHK?m`W-5?aA*h^wK6=n zN0K3bDr$*}LdEenbw{K6Sm?mWj-PP%bJy4Bm2P*!<(w0390H$2{}V4uFlPPvrDY?3 zo;*X$b!#Ky0va}@o$T!o7;zW&1Vk<5Q5re7D4bHkB+52p7o?M(Pt(j}yk+2aRERkl z&f127z^53C9}Qh^zH7e6%dxjI2wga+zm3*_;^uszrNi*zDrbG1hm%COdh9)84O=0XyuD}Kf{iqN)tVe$ehi9`5f6>^tP4^>1(ZaION9ir1D zB67zc3`tXGNetY$khlEZwHS-{-A;kC1(fLE*#mL6+vP0(@A^PIp`yNNF`VE{)9fRJ zi6`m*Km5O+omA@m-PGtNn4t>A|6SZ-5?>cna&zdycJQHUGWEAH7L9-XEi=sgb0i_V zi@E6QCLRL*ly#jPeT{K;L5T1~n_J9BA|OZy{LuXMN9^z8rQ+A-L)oOMrG3HB;=0uoDc+O%;?td9naR_<&6M19e*iI%s!1?zxb8p46@!S)^?$drkF%C(GI2^Z z*O^12{JZ#RK257o8Y|wF0bc5@T|2*0@-t7I4P@^R85N~^wmrS`>5?|B`?iv?fidwl z2mH)|a*47gzfjsGhs@B?t&mtC{`GV4tEEF`O^*@Cmrj@bdMhb1`NYoo^rXUMW!W?S z5&~awLe0nsHQGLkXCIb&O>*Z~uug$hs^OU1sD@ZNw@1a&5ecV$SP#?@|A%z4$W}%r z^~5!z&owbdUFL5T1yV@5;5OvWX($oHY}Il59*Gb447B`1yv?zkebhv|(wnPYG3rIe zR?&~)wKX|<)FR`1a5pjjo^!i-ZUd$bBboqy3yt!qShWtP#_Mp9 zjMt%G9zJ|{S55Piosa(gP5xIGZ_cURn!B4*^VK&@wa-uaTRJ&j^fr4!oW7dEGj(YH z_jf!0{Pd@1$e4xQ6{?pS1DF5fG(5|Xd-Ui2!|`R$BB~l)>4v5hLrzcFnCnrDg4wvP znCFjwnyWbX_U>OgS)@rzo^nFMDJp~39-(6@B1|TRcpZtI`GV%Nu^|t+7aWLit2~3W z!jg7wnK|mO1A8AQ!_6rfw*;GDW#dszAxPKLPre7gXw()L2m7N$W@aA$J70TRr334C z5*$nw{brhce+%y3p5idr>8x93JtAv%k9D{*Ejsw*UQ-n6W>r!rF6i9bQ=jYvijs!G z`8$^a=a;6YV?(t4>(?_f|F?J&PaKnb#eZ^5&T7Q!_>n|^^y4OL{YDC#!SG?Qm_A6X znt@WlFuKg&{XY@)yg+{@_|n_>pm@&$=<^NK?9v%=ZWHSVYxPBp$`HP8@mdwZ{*I=Qd&+{T840Gow=oqq(G zaO@i?Xx%X5g6sd!E;Tl+hwCs_qgD!p;9z!MScnpPBoomf5QB(W! zIBRf(`-4R*>(21-8lN3l@AcB9=}U(b$WXAD|8H^q|_IlHNPX#cGruv zUVe3T(HL$`V0O7AN~K_@1`C_QJxp7F-u!Kf_=HcEGJY#K*NQ!2CPgrsfB&iM*Z#yn z*ed_Dz5e7csG97$8(pNAZgYKJ=4bXmotLSbhJ;X|*h^@5N^x?vwmkOs#)B778l`=A zSeG!KJmvlca6ARg{VEcN3EN#r2Cn>nDoik>Q-8W@>hQ7Y^{025pW&xe=emSH^jh8i z%Dvx7@BPxucUMI^_mj$S(n-YB&})IRXt9`M+s9%=AGRolIP*j$8b*>HP8q$r@RP&y zM=9$b*xKu5v)^d!x~O+T%^@SF@jq=Zo=!rQL-)Q&)ZGvKr8~o~9ad+_MSy!!88@pa z0VhBqsB#-d`#3dtox#qiw5l9C;$ZE;E5$Y*Z);KU)w1SEyzRx3EtXTBdNhC4a8_;A z4~|^MJ-q`}+WKOVz9F2-(5%>+>Hw8`Qq`VbyvxP-h0PsqS~&qtKf@7`jNSB zKw$$&Ll@Tm%XA8UIM(Lga|pT-bM-23+)VU4pcyPdg{;#(2*i1*g4{CFgG`V!&hY6R zJG;JwQ(-H6Nu~9)@0)&c#7u@3sr;g<7HvmsB#}t5Pf-x6rG0x@i)uk;GLrXp3&+#Y z-QHjE{%@@l!`P%OUg^kD8NXCZhxjQ;m z-MqqIGsUbVKSecvZzOnd-}N@dlI3E1TgtuL{>t=Ffe6w|bEMuw+p4U3Z^Qejr2i%; zTT4P|X?Qp;(QydtocZMETt{$<^Jv5k?46gbH!qnjKUa}YTIVjVHB+9H8-+TV zR?{;&x{|$=!^T)Dt1moiLl=zdY8kGIg$hCKq~tvbv!gXIy3Zw(re37eH`Z%>tNV&q z$dNXH*?0UQ5iNNT*Og6Vd@#!9=nHgNqi=hAdf^@fnHW8>h`A1R-U1Bc{QW=&EXk$; zXuwuqvpMS@stYkZVC?-?y@o?=*~m!j-UQ8B{Wh)GMEH66Ols1ZBShmiw+C91osqSH zji?u;Q8T@%)@TCCmHrcYJVcO+oyv9-eq-E-`w7AGTP z20^kl^XMx=F-s&Y1tW^W(sy~|!U1f~W=?Sa>9kLn^|oJ(if6Cr_uKk}$I9OW}5 zl@Y>`eC5$zSa(rNJyk`o!eg~Lm0gc3DI^yk9b z(-hoONu6nXn%e_6>a-PVeVC_S(K-e-FK7q9T=wfSOCUR=I& zu=0K^f>o>;=7-garL-QebI6!__YUSDESAZwY0*84>oY2}x8WOeQ||F}A#Jhzam&ab zDw5dF4oUz^zjt2igYRO=1D6p_uZpa6DZgCBMZ5p*vA(epojRPy`9QaPa_#I%=&K(i zl6{>R;Bdxc;|nrhrC=3)pDO>DJN}c$aJaWdC0Ciga0Mxo=n-Isb&BBnJ5BEH2+VzV?_Mo`#B|L`VhRbs8}fcKAq@pN+fGc}W4#zxvV3}O-e$5+6{j>{R7 zqaF~uRg_=9DVdJ%<+A8a+l`0QWL%9_%CNzcV;Hk?3)VPT8q zoP)mI${41kZ?PV=zD2WA1&PM?A4DalYH(EsHCsl?kz<(G^NBVioCc(fibL+y{pz2u z!f;o7NiuJyhO|qKdERtKjENdP*xyN%H@|J=9jB%AgB}DR4b{IQrs$)HV&<-)u+j-A zUav|=(g#|auX%;8$+TueOPa4)uU=FqzI>)3sZkT@hD}kqlh>)3GzDKTnxJPQF^(Ri z&#T~-109)K)^Js&VV*GU^!oaw&wkyeBW=f!q}1~J=$xdfRiw_*)x-y-4}V488r&nC zRb@xNV+YPz(w?99(tE4TA01fhnMV*8&IOKR*^P7e*pE8$pB~QE|wc;vMuorLrW@qV zgOso%#M@cE8~)h98wJ$e3-5Exw5(%q2V&vC_`@%= zKC~%E%@mcE2-lDu1~L@kuw7%IX-@>?K2ZD(>sWG`w3emH4L zHF2cA$lrCo`TJ$V8qQA%E|NMm{4l0({rqMo`e=nMc42nx4B}3*VbyADlTm)QxHNcD z-fyH0Vl7NDAA;ptE2SVen-k2i70AWXp%e5jHqfEne;r@C3jr@ zU{?>)*dS^%K?T-oSwL%96HspP#FsruQ}2R0n~d5dlZwrIbz4T-O}cfd_8fjv*6&ks zat(F#Ib=@elU#u>cH#N{P#vNdJZz*$LxfMZdva3ussLMTzO7kN=cS?bf$}_uG#2|( zjWx$+tMTo2x0}p=ICJJqis~cJ$Gujp(!Gx)cMCd39<9dF!$DbL)#u5=Q&t z8eC0aGO*?%*Ac6d*INZ-??3DZD)=rC8xOxURih`Ox_q%8R2?2pO&mbom(m+|@aVEr z(bH;^J{=lFk;kqNE=HDG1oID6dhMh5BYRyYxk*oQ@%vu3+LLmpiuUgRkI`ih-V!#~ zaTh{Xb@l!`PLft+kfjw?nEu3#GOT5N`x8+aEKd#doW)$bAXlv7mAjthpZ{`U7#=M5 zKyy6!OdktuVeYLlUP8jIjW65!D)Hr42WeLwdWlmL4uytZ!{7H`#G(Vo-?%>Cn{r8i z`9ZcS!J(49Xt&nz+l3u5=>ewLi&zu(!Ib@5z(<C;6Ez6AzZ=szf7|z%`hmV)TK8V5T;-Vy8<0k8{74Z7eFz*vVR(Gq#sV=at2O@Q z=}$yPmGiavcj+<0nN^|+;THt z?9@0A98V>}H}*3Op;S|_5?j^a(^+z9dHo$Hk(w5*()RP-{X3e19uNzU=$$t4Jcno> z@H~eMHpLdI>iMihowPv4-b1%6Qq`Ty8EoNlZucduI);uK4@Zu1a$UIeCs*Y<5mB>~ ztxBkcE2D+kOK+Q0vu=<4bg&dLX(Sk^j5J=q*P(M;Ss|*(nXvTbr_Fw6g7en=MVV4j zE)#jlc&a?<@l!b%GS4BX2+wp6+8uF7X}uqCftEV8sbY58x=Je}7~8F~Pwx2PgB_Vk zjs7c*ArDp$C#^3n&SUi2bbX35&&(7eFO_Q7c=q*2B<3o^f0U2rb@`8!l}}5sH#uIa zZN6iRxCY^-6^t6bS!oxwn%NgtKKGDO93)4A;t&HH1izloHhoYX(BwZ_1UUNSG>RcBX)=)J2b&|Kc zyF2o^b^D7ul872GnDmrmUZDjaPj0_AxZ*m`WRAj1uViHv#ktMQ(;OADZucPBd^umG zM0-YGTIAw*$B?#O8ZI9DHeSXVWB!;{-k3^9E|)lwHmNk{HM2}y7jw9l{I2wr;Pr3* z{+zY(=s~v7&ddxoyFVwUmDDc)uK4} z84oQKnATR+R8!<{`UcJi`UEH__>Q&(GdFZ=NF2PfbIR0!n?7@Yu3D-YH{1a#kOVf^ z>|9mSUY)<4n3;MTRwn|5@7pp|P%ybKDb%WO_Yz2QFle{Ep3D0iE>0dfK_lpae%hMgJe(dqB z(EeLpM2)8__Bv+y=F5Va*!bwK!qUi#?AJsRX|s19ZM5+}$OOdFfRR@Yr^<2qJfv}@ z0~EY9zXQJg;HxD?Vcu8)13c3TD9!0_PPqBZ(mHsq^xj_oC>^|W*l37bk@fLR8o@5CUHEQ`SfTA~5t8!#&@_sVl?*Z!fQwSG>wL1};NL#_7ukohsUQ>p5W6F=|4MhH^aV8yffI%OeTL=S*8(gr_`ChdLq2+=wOWI+bvrn)|@jkGcH;h_*%@r zrV9kSE28InKYaLbqX&C8UUAQ`aM)E**A4c{CzG%G`1u@Apv>=RJ}a{CnWz3X1e*qR zagCa04PqO}rnczDucR@FxjEKx{g$E`HT$a|0T7eBOge%bJ>ftx2;+y}AaC3`K|o@q zg{ugKaeXp-#3R8?n?Rc#dHcJ{vE+;1nd7!01s**=UqO(|uUwOck_bb)xm2ABPz_q( z_V3?%(hMs2t8;Y&#$WyS_+IJDPRFL79)>hrn{?+rQ+uVWw`XRs} z*>`)UZ5_S)H+WjZq$=#B8QQcuT5o$unuSc)d{I{8!Y>j=8yk|MGHNt_Sm$|IDewg7t(@ENS6&LAivD8jpw~0slOLb% z_;|Yz^wm3BcfCp}0zCaLqj>$7(ye2A;jvq#Tv+d}_bqr!i{|`tO%s z?_1ab-M}-Ewu;MJ6N3)Lv;R3QB$~YM_2w9>x5X$kGwFr!% z_;&XwEqMAL_yu(C3(_q?^6Fj7bz96vCHtiQXON0XW?$lntdEmOq@QV2)D>zWtwirZ z5FzIMFBERgpU57rS1mWHnw;x!wHT;J7fJu%LKo1vYmL-apOdC^O-&~Pxr;& zI9y%-AAFrqKY16V*7Wn99#%mPIno7227dE-E|ytnbzztUg5z3a>VaJtsN(;|AuvZV z6onthNzp)WZlw7OLT9=y5MB3nb=*tX3AJ4gqWo}Jw+yIa$psmi6f8i8^JCzXB3_#n z-WiAq=S|a6LIa0$M~8W`iXM*j zbW{2J2^u9;H8uMc4~Z6097h+;9_YJ7<82Tp(PJId5l&v~y#x(4=HMW+>GrP?d%mjP z0cqrk^7|RM!iR6vv3(vW?Yoas@B4#&l&IZSm{ffH;}anw_5i2ORmJV1R&29#d0u<9 z&#P@uQ?M=T=Bis7$oa*kAr;u=<1-BqguPD9=ms3h0ESoQq?OU{*H;Qley~EBz(Bf* zikZ9{f)*nrcGvwGPwLYBJ(tczkD%sT_Syn0p*1J+d&e2={aH8H>NZq$!A&BA%W4f& zb3?B8XcpX6%;et?c!=&2J>Oy-JKoWnU2oXMi9+ATE{xS9oOBj0VLROhG7i&D9y{Cf zA!y=USP#&9_y6Gh&B-M!_}Ee8^Fynf*>)u-lcm@kK6`w;E`q(0jSS{k_H(;@{pHer z$dRlyta6_pD`5&M^7(==^XvJCGWo`D96~h@BmBc}Yd~=xP*JeZRaIp8&E1n;j1KFDpckv;&3Q6DPyZ;;w&rnr!vt3U&mu zVPg3e|D-|1VM^rSv0bVe#w@aj)mmJKad+KB-AVl)F)w~ANVIe%)z1D4iH#mbujeR+ zw(^xCpVoQJ9cNFQ7~P`JSVzY3JTYs#T0MKdV1hZ6Fj1U5(X@`nkk%mq932=Mn z_x4zx6;7za+?Udd3&%#?PAT6Q1`V$()wbP=A(omB$J=nPOxc#8%@u7qnNzT8k;qeN zTkf_I3=a)e-kkgU?PHCJ-RcM0$<9h_@)YoJL1Y|apc(yFK4foRL3$@gC z;PNMs!+7Uy*I*7G+sakWVfP+gr9Kry3nDdpm!=!&x+cZy_^GMkUczbr54YJ!D=VwD zwKb6~L6AahvSP<>+mPas@vI}6_knYDQ6mKVx5)7KN*!%gj9}#SUX$T3+#DTiByoRA zP;qE_Kh>nDxMxij>%}>Y&fqZ@{O#Y8rr?Ps)0}!Q5x=JM%=;&jU-aI~Z)F*5zSb5T ztyh8`X4??daNyh=Aook#36|&spymgmV-fV}cbTQHU+T(cUypQXiVR^jut`rnM88(? zo+jlcQSaO4=a=Wtek1H^8dmnH=Z$5`FcAafnNeSO+-(uwF_YjU-{)H*3C-LCrnpt^ z{ndP%iIr822*aix8FafPe)37YFvyc+F4@=X`=$`O-Fv>hu<|0-AVa-jtu*zB1Gjq* zhD{14THjUy_2g8`hYcieKY+v^L)%l=pOD>Z{iX$%ngzq2*gZ*&r32q=Pb_-Th#nzt zymKzcxR6kkeIkr)%1gIkJlnhhqX!xk6{>B`ziuC&{)^EYWTAO5j3udz?h-xW`;lV2 zCmFdI^}vlp{!Aii^U2dZ=-hIpd>{L`C=6*HmxGV9^|l-2?iW9H*3yDUk*5?l!_hCm z;}BwU?{xwB-)a|P^JaqH=G4z&7wiz3pw$f87H?7~|~2N+(1vEyHD%?fFkZ z%Kx{8Zk2!a`7`T)ABZn%;dVoUbHM9~3HKgwln&6!>VuIq@IN|08ng$E!jBpC^I&nW z9iU!8*4V3{<@a|wdomt5lUP^NAbO@`*FlGoK2a+YkbC!bhM?u7*Or;Iezq<1vyYEq zi6=1s8O}4XlWLs!lfIw_bM+Fx0wJ+}&iv}ZApfZrF6cgvzJC8lg37-0ThAxW#D0YJ zzpOB9b32-jSYPTu*6asIo&QT|59td0&N~z%*1BSsL$H&mq|7{TBhD3OGJfYHruQcE z>$Y;R0{Hvw%%S-gzl+PPz0ptJH9W2Q`&p?LW>NS4i0&mXdKR@%R&f_3a&6XR;q%a3 zaf+sK?BKR!%?DJmQHRY@>CO;^q-~MCb@+?1eHXy-^ZycRz>+pjfW)xLfK7hAhdp!$<0YdxAz2bhdro+wID113iqI-K~)t3&9Txl&{aEO>%Cd9_J zypUe!dX5{giIjzdlr=VSxv9_&1CWq+ju%#Hge^X=yr8FZI#X#I0Kz z`JNrA3TZeR=zHR74mANB!<_&Rx`+?2fctiD-+};Q=9o>$OXkY}w)%f;y?0ns+4epj zhf(I*7-tYr1av$Q5fKFG%`%FIDj*>+ez?#?)^T${m0DnU~;n0-fOS&u6M2dzs`qV{BYIM$$ZvCiLT93i_Gvq zcenCow~4JX6?Xnvm-ZhxaNyXQ!j&SV`V{dR>wsaT5$E3*@BD!N&a!j_i;^;Y>hX*B8@{+_#qauaqDA&Vm5RoWW{&#(zc`N z8&)1~_IB9#k1X6EwnWR!vl0wfncf`%z4E?E0WUt+A!7)mCnGeAml_2ZF0=lb)V$TC;&ii)(jzmdYd|W44AXH2zgWT_$Lb?L@r=G14=m}%V9ljF23IVRC(p< ziUm8=c=1{LIgz!Lx)pgPgX@oZSbWhkw^tfvs45@3ZV$v64qJhyP;Haz6CpC1*WG+A zb*IaxjR(lkY}rtsSL2Fn^n%t$-J3{}&tf!Mze74H(fJTXhIV-ZfHPP88IX(88QF1IHirjRv&xFx1P- zb6gF_|G6i!_MrVh+44u8=3_P=dO%aD=bmOnR?}je7<1uE3wms$>!A!{X?r}PGx+EF zn;;7M7ACVGsRWsid?hr8n^DYL?kt2n680G`I|A3zQ)`WNP3ZsDSrQ%o`f=5|%BO8` zWUyE1>spdDo7N;cLShu|-0k?a*;<+bnm)a()qeTPV}+mhr}~qHQrOH{j&TI#oLSEDgurIYpw)MMHndMUGJ#(*nDjy3^O_sO4{GbCqP5*s9kIJx=LLbsO z@JcS+!M;^Pz*yZ?neT!tLwBDDsnNt9pbdG=;59$}$}9KX`=Sz_z2yLZcd=9*=nT?Y)H^$u)0R$Dhg@Eq`c+Ec z`*DVjHxt<{`>;j=BXHg^`^bWd71+oW@=1Pn;Aj_pIg%0z$>@4^U0o+iuwP5opn32A zF#eiu>7pqR6JhNqZakq_>>D`dTilskY2iVkt-+ofHGId{zF9Mh1(SGj63#|ZuJUyN z`~UQHPV`&(A?)rScX9P6ztW(M{plLAd*6K>-d(X zgHoK>ZSCzBD}Iq>>g@AFc7c^~e9}B>9tGT!dS&U&%sLV)mnp#vfp)=~(G*E|WxM_+ z?!=k8)rZHF4GEGDO2Ct5g^E%PX?CbE`MW`2@vEB2d8)>uR7&00o9N!?bxX0@&!HbC zY!)lcZwB?u+Ptbr?gzXyQ>lfl(yQ!fSr(86&w96WeB;(Fn^yM5Dk6)GLL^hP?9c!4 zaTettQ-&Z{^uUV~tdh_6S3a(Oj4pMvEbA~tE!|g*n&mr?rsU`Hdq4kMJcngVTG92H zr%n$Vc?sZV*hQK3|Gh zjMEd~bzjt0*HagG+fU;Wm^SxBklRqNzMC}q^|KhaVv@ZeAn@2pM^~@iSA4we({y}@ z>5eWyeRxUneTg(L@~1Ba%*I`S{@-BQQ8lF^tg5QE^03_`k{C|%>!@f>EGbgab(d1z zf%AUGmL)jU-|hwyfGh|5m4m{&jLv+BPe9}+$2)-yVq)*!*JW+_y^Od#p0QlCJ$O26 zCNPDY_=r{A{@<0Gv3mNjW=tB8NjB3Wx)R!-Nm!jUZP8@Oq-d?Lr;OB1-4`ohIaXL@ ztNLO&$ki4pTY6zZuT&r0VlY5QhQ`}Ri>vLwt@_nW-2T4vd(;wR!Sx8=&XY1J$e#p| zQI7RwZ&ot9Ukey7gt<<_t>xqTG+TH<`ahNX?0=OXxeHC=E2OX{BgZAELJgaS!}Cq@ z;cq1}Hl*$zTh>xMR53B*8e{c+tVAul?U5NWC2u-%dudiIu%W-Mx=zbFvv={H>vE@v zd1(zB?N6daN`tmdPP1!tfFF8as^~Rklia!*7LN7_{DRX}m`q}0+$P%iJQa+AH^jDO*BKGVIdVH4IP*Enk zC_dvs4TyeU{D^+P-ea@1abM}P2AygFK|W{~U<_DoHm+d*3ofIg35xoh8ejDw#q|O$ z7ZMY;&$GlU4g9-AoCKo}$<+$jsgh@tb<6*V^bPraF;DhEnzC3k`|_h1R?HC-nL4J4 z)A58lhflq}Ru#`c#;DbTv-p?hHD3&@{l@-@`hWl6Ld$m(@JwSg^W6WOoE{B4RsvU| zi3&|a>ktx6DMJJ7X6*H|M|ElUAi}%%5oOt)6_L|sfYH$P_AT;gibeL~kVyCch&(pi zb118WMCws28R*9!fPSgz9$BKKgCwH{VBlK3D#ExBQL}e?jO@XSwfi%BLj3SA9x`6hKSCtUr}!(=@}&)ekw2t2b6cHj3!@9=!? z%KQ?p#-(LK{~`wUjX`Z`>d{Ht_wM^J$ctS>6<6f!ap4t~=@ew&fIy z^XOZyWTbVD!KtJIR_oHH|F7lihnh=di8Eo8p7Y;f(W!@C5b0VV3&w^T7YBF2O9$pJ z?yXSg%hUZ<7%9k*{l~_q{-L5M{okpCxgkyZKzn<5_Bc*}a(TY}?2I8-oAk@cEy6R2|x@Y+_jPl|03q)YMc0_)|fyYRiS3o*ULB zSLPq=5IjC~#XI`RrJSBhvX_ii`7Mhci}S{LW-UhAB2kbEYp#e8tLE!oRl_G+keWI^ z3%;;=rswo|V_Vx%3e;asqPu}V<(eW%PXBIu0iHU)R1*ya1_tUzXs%3UvjHSreYmnX zW{G~6h$BCb&3uxB9zZD(i~ZlGPQYeR|H-%GU<0i}H+qSddqe+;gsn@mx-#6T?bW4z zV%vjHvm=b#<##5}{y*}tJ`@GpBvZ=ZUQ^)ovF&f{sT(B&1!`()F=k2j{M9np3^bap z`u2!2c=K^rI6q7E?Jxc}w6&XAzt>Fv zS^F@0Olp0{tBaU!XqU$=>qATV(wt@Nt@-YJL2*gmIsqY}Vrz|C?(;oaYj= zc`mvyuK7A(NSnFlm~EL)|CrGQP5^#*{6%Vb7Zln=XtPuqgMjVT>u z46sY`fH*D1Nt$FN22MSDP}(zgNze+p^?F@IM5O8fiSop4(ces|XohoUW$QD^tJa~N z$(fO@YpPf?*(%ZD!&Gzjn>9H3$=Kw?4r804t|if)qLyBghP={}qduX>Vx`Kh(VE;* zJ5gN$Pe@c0&!_{Z`~1WAWj+3>IGmc@C+7QBeyx@lrrdgKXZuipj{g<=Vgg@LGV|+P z`Dg&W0aMbl^4^JSs;uo6yJgyi-1D>CYJ8#l|rnH5eNOm}jQp=8g5b|zcoYb6I zwG67FrXr4Qacg%{4KAB_i{d1pMh)2|?~G-}&k=eAv?^!!mM}}6jt8pb?~sN0X$}DT z^bt37wtL>MfYnKmuA?F7&_%ZcqJKApE2|>aw9Y+hBcD4o{oaJ1vy6?SjAWk;v)6zn z#4Rp*KUS2@t7X!a1FXS8Bl3;Riw`1XJ-AQ<>qMCtoFOr-TUZgK`>ZI)|1|1my3K_L zsKWDThq*}FuDN*1M~IrGdO`T(bi9;%VzGJi9>ygRhs1=GYp;cajY>!jGr86kX){70 z@l~^$rrO*ku^VF!LLCN$$1ts!R_jRMVjef>9KLI18k;cY7rzxoYGWyu;zcq9__>GO z2kV&m)wP@;TDi5~gq!&gP!_cAU&7HtY|=b!A|$eB4f-mQ%WRL=ZbU*7%V?JcuJ zJjci%-t4&ed)pq^#(Z?c6BWD^drx~RL~QMn+3^}Rtqo?y^wiwV^&Q)cW@lTr@M&m@rXop6aVINsz{<@`y}!DRvv8+c0+q2l zu)7Y^J8Z*fbfCW!64UV*9=HqiJPv2CojR6$en(G)mIxNVORCEEd*SmE;XW_7>EV+q zaIc%yu<7hj>A2WY-9nF+m3--%1PMZa=~Y_%qSOQHiSl$`It3J5gg;8?Lwx3^zDam{ zh4*#r)8p6izfUz;wEUt`e^}wNM@LQGZcg&1Sc&WO>7CRv@H9h*(dx{xV{^g2HIp^W zuUdf;MUxti4{>V{+dv`kS`h7fDt>^7)!ZUbvFhoYh4B8AX9>e~OT86= zS>9H}<_V0ETkAoj+kTsr@HW`ri~OFtu)}CT{x={?FiJHO`B})kSE0P)UN_ij1XuqT z7ZnK%+G?wJoRsG&HOr98$(fo9W$%LoNl1H#>cjMXp`;Rvqtkm`Tub}xc(wb$W$$3cv67!fv(8VRmA3?xZ?^q)_UBsY*uQ#BL5MCy?(Hsf?8U1D z{MEc_fVP+%i@)gTQ@7Tm@5s<8)JBElTWb_CY;moIJf!e;C>OHL(V|wsK|Pdm&dFi_ zr=ce)O$xhzT68p#G6Ow;RC2Lb3nH(r>`l#4y0I12+vwdxf3sp}Pb#wTcSL;M{H~7W z!7sC=H{B+UkK(qwSq|~cI}a4Q#Hshw+^(12>__t~%gjW0gf{jk@hPYhIHy8LfV#^5 zvG?wkXtf$vhUR92Tyee3tff2%HYCs8F)ZTyD5Fj;W(ofL^DDOUxP?YQ%xl=3AgZV9>9ND}k+&tf zs*dHLbEt#Pn&!Yq6UIkm_O$_hrW$zgg&hXH&$a1uPFhtnLiPi`we+Nb zNwv`|)orrto5<@GZu24pH}4PuO-hh%pm=eTeTfGSC;G&6-<5BlTp!%7?lOsFwHh!V zC^|v?i4Hg3?Fd50LUw6L3N#!c{)tkT1~&q+IYhial2Q$gy3>YApxJnJCHA7QhpdmZ zE|7Z#?5d>90`s))Z>9xO*au*WnCGw|JUKuWE6GECQFwA-d5Xteg!M*nsAe{|q9~`m zbF@-gSS=>c@|H3RI=wU> zztY=UA{lxcm^4>8|EnnAb`Lt_Z;Wlt(VYOhRbIPs3P^xv3f%CPs4m+1^=_{~949@RJjs8s|`#E>=a?0v={|_|NrIF2y(w`2QRne2)L| z!!aAcN+E&{Z3z;)P?$Xc(4p~n6Ng#>r#k6uEXyeLZc4wTOnaP>)<`1c8h?QPTJGtt zebvQyIQcO}cX7>JiL_pXwgNFh`C?k<;~g9|#zRlaD074ryioApj(t!{Hg!WYM34bt zE!tY`<(|mZ(6QtYWB!A~CT@v;JFWHZBKHz3dwNc|5GNC}<$b7cST z#5yY>7ibgNXF>VoEk<9NqZz*6>91yWJRl1XCbYIKl#S-@oO%ycWilUJ8I7l;t*j?6 z9(eQ}VmgOze{H`>9^_4%V6^5|R=r4o%J zh*yYX5vj$Qzon>e6d@i1*GDy+W0FaO8@I>V{|UzV&IB}|s1_ljbl$jGeb0-bt{$aj2m5Y+jp8pD)qvNZ zJ1lgrY{ThU@d!S(O?h6;3n&+x39ujA-9%P_5zU0LhQyRCHp|4mYX$1t<+1eQPB&O4002m}#SGUXebj8X<}jDz3#Sif~jSlXC zV5*G7cuEwA&q!A2z(HaaL^424;d}$yDRM4*E7&7UjG#|@1w|Nmw6!4Frzi3aX-~i- zW6C53%0DDIxMJg4I$(da3<}G||HcJ>W2cBt*}HC1qlB(=&4rN64R~Fp@qs{rZl{GC zKw*~U2TdYB=L-EZS9Sk(tF1UGFjVfTzs`iC#I*i5(OVX;4UHqWwCOzYec71|k#tb8 zUWRQHLTN?&yU^APuasK^btW=0lw(FeLWj8%#o7C7C0eq_AK{XCOGs3*|O8}V4UBi z-8GR@AW%8rBq;zIUE+d$e9s2DI-Ubnvp})xO43;?t-TWLeJ^mA^S_A>8%HuC_c8*G zV9m1~HNGfoTih({84&v0bFH)^)E{W=(~3GB8VaBbm3)T<25_>CjuR)Rnr}$)N|1sA zH{SCX911Jn%3JN+(88zeZs9v&ZR#z6JD;6?~r%ln~ts6rAOjcILz z3r$c{$?bRI_a68mpJCca6y@J5>-li>ZaH6Fw80WVU#ulYQP$vg-NJOX54PO*B+ZEn z;%w*Xc*va%?l3TF>?O3v(jZpG@~?odaWU@;ptrv1Z|3t@@P$=P0xme7#fIjBSZvie z4ey5K%CMnPh-ntM@e+!OY)y`QetD`85>!+<&VCXYr=(hS5s$WpFbEi7>qN>t?5+82G&>`I#P>m4YdIxxta*lIs}H=rI*8kSFkJghxpoP0Ablavo$>9dtw%38PTLYV{l%~+FaCj~iQODGB9s*Nx=_V0% zy{}wl_ZJc9qFbTAC9!lTRs>gbT|8R@P55aiEvF_vL3R1m4L4LQx-~g*jOR~tpw!9Z zKkU%_)f@4)UAH`d;i*B9xhIs7(-Vr_N}?4Yq*}0uqkP!w^z2@H^$NafgO*|2KI+{& z)Q_+E`o`3Ba*N$R?EIu``T5CnML(K&olY|4`1!$=9pV*oSOl7Q$peJSPuHyCDDv?7 z3Alf3KOaQ8gf`=ueq{TRSc)$!pb(cZOsKQihrCZ9C&o_P`%}#D)rHo1N!o75D}ZjK zqKYl(hyRN$C14bf5KOELzm`Z4Gx4NAwE6niPSI_?r$T3?Goo6DqA4=uxoC>kmB)Xd z7@H9{?^t8*svDr!<+Jn+fhLH347?VCb&Cm5eEK1qH}!l{t;vSIW1M;~D~?jQJI4cCG*HZA$iUp1k1?F1GvQzWd*7E6NJ zHZMIiWVZRZ)B?#GK;btHXmgP=+r7?&^1;?Alo^SRAEcG}YkuhOWOXTw1lNKI6hREA z0&1Mx@YmKDM&8gj(C_KxqT@`UKDOYTOOm%S@(3irS9#@fHiYF^tj@{IzUMTW1J3r* zA~JBcozR;(Y?E1kxzy(uc@QAS1K;4GF<7q;4nDe}zdh~IpK$CWWM+KheJXlV8dCzt z2c{*1_yuNIwFAMu;G4h%7*F=jLj9zmWhDH zH+c2*eInEXj1Xhcwu=&L2-B(|zLRlke_#bOtO7k+D%#ITyPPXj8!Uk3_bVYfWLdBe-5duo*<%ytjBp7Yba?uK$%wHrz>+y8h0em6{` zL?R-^b~N51nc^b3J!rvQuIglCorpO&DP8Q7@?M)fhd#eh3Y1F?JQC^PjSBW51mU=N zO8v?>gYPqwvySKAoYSgQ8C0~j^$t5R*=)If6LZnj#FLR_$pf|b}4m#iP3xcRNE zB~WHj-p!2REV}+LI6s%VftUbo4Wo6sg>vwy^S1a;?mC6UK;$&=?d^v836nKt8bOmx z;YxXGZ*~!g9|kJRGgA*&u@Zv&Hk&Z z%URoIYhCw+eeJJMhqVb)6M%%OM(3CNpJ#j*QD02yRzZe@aPQlDmC6YpH~2V8Xo?-5 zfRXp|C}>Nd4k*MNHksWYQ~I>#L+$HpzfR^G8oiar!$n(RAfdccW2E;ZaPpY#FXa_1z5M5=?Kb4sB)9$a%AGG$bH0SX3|~fS_J#GO zpD1YgRISLDwW~nFX%u4M`<360_hsG|^D{utCLC~OzC<5G!yxQpL*_Z;0QpAOwQ;k1 z&)3wQ)}ORU7|v9?#KaZ(xN8opN-5<}SGzTixeS*2%zJf@uX|qa0ib}wBY|W>@u{`t z?zkcEQRyO|?@oJ!mE04n5%-pNF0f|@PgzG|q5Hd+nhE9C<~#SzmgXk*mU+yksZr~$ z_udMRf?UgTW6frBypLHE4Lg?d^~pXIAu?WnNk$bMq|`GF=NlhzWM6F)$jhU30p}TD zwlkG0iv0eWmycw9cu>4znMfJq7Yh)(`7DLHFFVry=BZ?hMBS^ zxK@yPeI@rqNrumL_v*1^D_;;1V)kNn#N8^6b)mmM646>?=oZzK$-uN#bFo+pdWLzT z^W5?$%i}Lp&j7M~EA=%ELqUqAFmDZFa5pVj{W z-3>0h`el|4rPjZcw%jc*z^5~y}&0LGWuF7t=0~SyxmHr2JIqU!8 zu5bN?k4l&lIT593BYT~I)(igd@)))e_%nS{T%{Ikb%Wb=jJ6d&cW~#RR^iMcNJjrj z8PJ;mrHUInd$Rzh6`}p+nMVXF6|g9Mj)>xfu6FrV%DLjVZsl2Qjf+lJG06!(A)y!7 zyQamm9wx7nrZZE=pRMgH%Jg$v>8}m+XDdx)u#B-1n3W(`Gm9)$RpR}vS4hhY+Q|a_ z;qGa^wKc7ZmeGwh{ffHJ+Eak+?1iR>tQK056D?Duq~@O~C}u1Nemwkz>eTSp@))&N zb)&eY*8lx+4SMOuw>NYF^CaHje`S97hyGu7q>Bc!??9j|Iq@PuFK;bE73!D)oSaBm zg`}haedOZQ1>+pmpX-%BQCmK|l0HW=h${k^SPr>b`P`ijZ+%BK^d?-GfenRRKLsKl z^`rDJkQyE&J7Z#6lO8*q)}RWTME;~8r-HdJhW#I&1eRS3ySi^xW8expenrsfpBV!h z63S22NJ*i%iWe2=$SKB8fFP@{{@x_JV+gwut5Ek8@n4TZ`8cM@CzhN3y6+kYT;eia zvX?36Hj#n$3`k3CY)*H!tZh9o$R9-veBE+Z5ExOyq7i8XyiRL=9qi{nC%Q3008;qKu5_oXMRd*US9tmWE5}g;N zY0FF_^E%eHWG&9>?R>~xz4K=X?280(r_{2=D-h4WiZ-fh6^ICm=ooz>dXxKkb%Wyf zuBuC5Th87FU9VoeoO%e4n9H&5LlK6BxteTdW7yT2nwsgvp(tz^2)Uf2-pq`Ft*vbj zAfN~TZC?fP`SgQVncY@&|vIlYC`npQwgwS>E7^Uzr&oi5vCEf0|uHqS8Px zRi!xbtV00W%=7Nywz>>{7Mna45?N->sU#9&Ea-W5{_2zhoM_ZdeO2i#xU6$j&QZxNn!xn+{YMArnK~cTUWrcMtj4xDsTEHGmb3B^*Xqd z(3D;$s#M(U1-YEsx(w_D07Z~pi)(yQtr~|gZ^`X{8|WF;s>hC_da$P>%Z`7v$h^4MQAKiNv+*?$u(Dj6J z9k{dZZgPWBX-WWKwFKcyH^HfAvSpQs@{mD2ahy1@IsFL1CW#UP3-HPHJ#k>H3^hq~ z>|#F({GzRsI5PqAh5E}J77Q162_4E&%$u*CzB8DCh{u_+VU(V*Ew+RgproVzqr%r! z=TFSv*5B!E-i`k#O*Oj@arz<_DR5Q4mr-4PaAAhS~EpA@!ZS!_!4YMbc_C|5mND zuMT4EF10TjdZC9Vkz1moR$JNd9(Dp`QpzDldUg4v3S4^;8Hs=_F3BORNPexND*`)A zIZp-F00bu}+KfCNTWb)*S7eC){j^b_pwhL;RCm3Ye%y7^tpKDbes2%k#$X;3!__zJ zQrC)4PCE?e-Q}FIQ^K1zHW{`_pqQ9)s7H3)t5^Rb+GZ2lZtBtg=HKCE-RaLpd(ls)MC7SDAO|9Fd(wNn!v^ z5o9wOk4)=<(_TadD1nd9n$KC(O`NDE-d|Iz@8;P`kWe{@J%>7u#;HkQE#zF7#)KFO zKhUq@PQa;u6{Hr)po6nc0&svLJJhcK*gLiQ0*~?hNd4lu>f!gnHdynqg9%D7=TfEb zKL<__a$lL10a10ww;o>^WHUC2=+H2GZ+!I06(MMKMeW8STG0=iiGwt*EgF#V1FC-7 z`dV^3DO|_X4$TW`9e(UEYIg4@(TWB}&`Pw-`XXL{Fl1F#R8*8Yql^m+|IbdHpVV&4 z=LT9ra$sWY?7^yK563{+MFOwl<}XBZaTWK!ngje%>-)Ec@}Uw)L{f$sXPpu!+(+bm z5GNa;4KZ0O>wwAm{GVg>vU4iF5hFDd3nkCg=nMJm6k5iIyL9$ii@FDfuzo<0O=ma? zbG|?xw(U8@7Wk)#{%XKq#0{%aM{x^txxNHSF~G6=cFBP2iP(dZ3#Ax(x`n!8cO-1S zzp-B!BsC@GljNNi0C#GA!N!|P(WTZp_BOSZXpFA3DjlJ2aW;G;kVq(&1(ex8dtZEf znfV3q5P@AR<+k*DTB9Ka8ZM1QQfQ$?oa|}+pT(uvIKU^;_nvNg%7jbBEutn0IQj=X zLP;7s0k5lKYVyl>7eJpOwZBn5q33l&tgogIsB55Py)E|2cSaqx+VdT6kPq!REXta2 zIm_)(gw`}v%hT@z)e%UuXWdd_170+glrX= z_M>#+l|48D-sB3`PRfX=EzS1j z*>;t9jw3Z1uxEpa`P@nGf#P{U4WNc5I28<)d!46JN`}oxtDR@r(XW)!$Y58W`E0ox zmg1+hB%M-tmUnvSwIHCDho5h}|EN8c{c^bfEbG!8XjGrZ$`0ZDdFL%-)S5ih&xt1# zKt>2v&!}y?>Dg+CN0>PPOe5XIhPnW^6nHIJMl`mS0Y)oa1rFDx(IF|`IE!ciW($a0 zbUp_FQ&|4_9qme2VuP%5yn(Q)WyqKCMs&qFJcuySIv+SYE42P5y^}Wvf@j;b%OCSCUbiQ% zxLNGEh^n7B=V54kEYjR($P_Nct7u?ak*yjEn9bSS?wq;WT?65_4A~1$0_r8I6)>v) z1emLiVF+s=yF9Zu=793X`@L#chKjB=(iW-+es)*Txw^jBQci71e7{@-eo>=qNt#t_Z8LM(57Z|mH9|#D=zr6t5;C>lsbnUVYk_GMQGF~ zXLQ8Yh4aVY4@FtE!+j153sCd!Z?R&AM#)84*idm0>w5ZWvN8V-Zz!&l5=+qaJcC|A zk;Yx38##~+!86Qb@~qg(`>rz3Sw-^8!(HLJDQk01cx(Up61)VnD!V8wt*>r<(O-QU zHjGbHT-BJJY=ITcMXo_f0W^$As9ReqgH$SKjvb!9F5}-{?W8`wPi143z9Eg3X4Avc z7}b*+ylu%E{$<6Jj4PJXN7(3yEE1Zp*9rJev&EAgp&&1N^Lz*^vJQ|vty1Z!=O+QQ z0frjan$#~ZhC;T6o-U~1rV6)HXDH}VL+P)`g`>icEYKT(1-6&eLc<# zdZ7GDejTY6nJv#3rqAZeW&zAE8gdlnX z#B5pVRg9LgUH`-waLoa%$NY+kcwXPZZ~l1})9j0l>i>M@kLtr_e6-~O2#*J?+??{Z zZw(8qY9G4Jl3xm#xvy_o7FA>ZulnU)=hw_#(x54^Y^j2()(?PVvqjuEVxoa+2mmo|0U2lI;F{xr^~LV4VmQZQr zZ2kd_4U2>nwT-{_4#)Wd&Lu$OGQ9Nf&LfmoKmg3^JAmA;933hC0$w*6vM8z zx8;VoxTo_P$FW23MCOsTkD70XCLrx@2E+C1a>|S)nyW{f0_%}YaQiY$4Pv|o+(1T( z|3XDd79jlkx4GI7J6&yA17ci?X3>gF;IyvH+|<f_mQ|-90@>W|@uB51lNQ-ky z?sp$`h2`Wq{!Q@TZZOJ&dP!J3rj3a{JrglQ>YiwB8+2-OFY@8oXW&0>u~oVfDT;sq zd+_nKz~@gP zFd-vlHK#z`4^*#FZ4f`|-bpVXpOYKsaA8VayaDr9e!~T8nd=sw;FFo%tfve6A+ibF z+iGJ^^%mH~f6;Z%S8aOgGdh`Ns{L^faA2!JIf+WO!$NNmi!OiL!;paD!}Lyar?)fy z3FQs616@6cW$HL?)QbO&1eX?HooxyiccoGdI<48ENL&4wXo}#^Ko<~2ZF=SNvJbMq ze2T@LhpZ2{_Wh41wt1jzqh7h^Nt&7WFTo!5qcOPgx83`Qv=_|_0JuNx?Hv)~N!!aFE-!h<2DgC{G5ZQ?**sfT6JinQaDnpEPKYH_kGi z(ib5s#eOF4eGo7+c+5jGe=!+YC0?P-=sa7+#nQdLtu|HH=k2I_f2w!pBoa40>R=-- zxxnsbrE23(Phd~IO{Wh6txyxAL0Z%as<`&Jq6dy-d}w0n=J_LX&WaQ z6@O|;<^-O0AZRarc>W+3hQeiJ~m>*msFUKk)wp^Zm5I(ML_ z3gl5K{*s}jZA8~KA2+tkqV{%Z3@X_F%8N7k`ae&oU`DVH=q!s?6o5vdkU7A1PQ}@; zkG<8WkCs#CLDpfoo1HvAfIDwvz82Vy`hQGKxS4EY@A!Sl}6 zpFU8ub|WVCO*H=@#^ge@lM@g3sl;Yw(!y5fRo3WTNe)IN~ZsM@RY@%@%9>N zY3n1!H8<}~2u$rVI+A`7-0g6uv-9G>w#xTkJX_LFVVy6x6`now8-C=s0@~4*0R=|H zKDEN!#=`UK4$}<(WNMuJXrc;T=M@FJJy8u_<i-s=CUsU{vY~T4(pY{FuhnOH1%VDQB6rW6G?PgXkiJs$ zthV<1#b++cNc!#rQ{`!@tG2L>rGtRomCLqX!7k4eDdyyp5b5QoO$3fTN1Xg{U=PnH z<Xnjmk~>|uPm6ksU9P6UK6HtjG^YAU_A%EhNj@|l4Ry#I{pFsp9?HYJ z+sf$HZiAYYOWTk3;z6??Z-J;eGnvfMCNNPk%k^b`n{R%mjA;Yi!c!HXB+856tMK zmroYKBL~to*q@dajx=U|n!GIRWJRXzg_j&b*IE{Nh+X&)er4+>9P{aWA?V3m9?k)}CJAORIq;mFiZKWG8m7o!)Fwxy1X{gAHYyFAq z4+Kkcb^OyMcLPrwG|lULQ$SF{ag|`OS%3AF&24SUlI5t(U?`txL>nf0w(a<1YSk3Q%F&&#VTC0L+R>cQd92o{*KS@i7 z*_c5llx)`Nyey(!;0KHQD?=i-u}F{Lh_3Y{m#O`Io7d^!t{n29Lir-iSNgq&@1iYS^8mJsGzIWX;5?;l5x^Abp8 zmK3K6T)oRg>CBfQsNYGt(#iaDxcfhu}(KnvH+#Xj(gcFgx{pD$zGq}7h!++(@AraLkJPo~9En!W$rXgMv$%TE?h8QJN4x+`L z6cYRgTV*JF{8(8NbB@&xc6m`d#r^ zw}VEaJGkzohJuHnHR@H#&ZELjhb%! z$wbtCr2gOBd%D2k;`eKi4{9eoSpGpCxIP|X`+caPO+TIZuwAY@DPm-BS;jZe1v#y4 zNWCt``dh0a^?!}{h0d6?t8f3G_gC%(R|Ihu(B#urSkqRdP8~zSr}t?7u3ahX3tGo@3+L>$zTtiOk)#Jd6<;@s zH-2X~kx@{2$lF=~4{wu~!GZq@qR zOH9vFRMT&s$0BxN_47-W2gd!E{Q;Nu%hwO2FEiD~OX-OZI->_8;@130R_`nx8}D{J z$OCWdisb2u^6T{Rl2%SSvH&i8jg8I!Vc*!zE5NBP1)1$Uc-upeoUMoWfNd1SIL+yT^N*(E=D=dUYKa2z4;lw6o@Uxf&<@An_RUXaLgmdh=7I@l*p znm93eKZr089)3T1>wJ#Ql4%FX8bfZR#S^pDN`|^B9lD;fu&?*R>&WbJV{faJv(qr5 zJoUZj! zrZ|fpJ)RvGYt6P6daYk4b^llCvBbMrUn_DzX+Z$#;u*;FIW9)>PYYZI?4JRDH} zr=op-p0(CL73+K2lM*YhHfj)!Z5w*D{rL2y--V%1>gwqD^7Chprk^^zm#0m{C(s); zJV11O>W6@~MA7*>r5E?jAGvNjv!~Cb%w@ER#77%?;=@KBA>17AN7h(%U}OpdSRdSbonBXcK;+LwgBX6!jp(g@TNo+lDzgY~S z_b-mtufOSO4esFTGzZOHz;G;XLDgljN{FC)et7PhAxh64|F)f!uXBAz_^y&)Hr+*> zY|_r1yQ3B7NVi9L=J}4@zTJ_*OklWKyeuO65?RldNb?UEC#hQ1$jIyD@Cu}XYHpq_ zg-oSW>oT7O88F{gMCD0Ux<u=g{5IR6Y7r0`qo{#lXT-!qKg7#nmZ9TvV6 zQz!!)8N4|+`5a}J$?YkPtb7tvNyt9_n_-C$&xRy`2K`6X_2f*)?B_Qpwk;# zuCK;2-7F%R(=DbAwGua6mZ>K>D=ci|mCfzX@Pf0*fJt=IIGl@(No1--wH+@7t|Krl zmqn11SYT@FU^J)$y$REXJw$!0L32*d2;N8eCe^#RM(eWa8u`#wl>=H;e=cxeODG}? z-J)mDi>L|CsTRAgk2LsCGWd3P>w6`=rrAw@u1T2CaA#Jg3M&`fw6JIxOXfkE1sQV} z(vN1~VcSa2tNDhVPbhXe(-%}kdhRBLNSb$;E5>R@rYX4{7I=B7_zRzF)S5E2^$x_F z;V@siQ=-TCS=KFmO_l4e__~#%Jz^K3hkr+0rZ1Jw9B*`$Otf9oXp-h03Ohw>xj?DD@mUJd~EUaFB?S`x5EGR*k(^D$fcT#ew3< zhm7!b&J+Tb=m^m+7n!e|Y1CxN*iI&>+Qs~(cJQ`KIccpwhc#PH^&Cmm7Be|`+sWtx zV$|iSviSG>fdp29*r3^FCzs0&!zcB9`7RAWx^7pTd!k@H`9;;u=S3N*q=i0JvwdU5 zOU2Sn^Ja1-MRr4d4=VSK7c<3UOG>?s7STk3MIM%U;c)V$dPM9icfDzET;jU1Sk=KF zKSnwci?wMzqyA#FeR>~;4K)0ZrkYls+^|1oQ)IO-spliwGs{;<`qRB3D{2wV&Nx*s zY8aN7V$3KY$gdLm6ZK9tjVumTDD0H^pr9|ErZ*tf@ik;3?9AvThvsYbM=tHaHD37c z>S7Ty#NW|-r}Vp}m&-uJrVPIz$aa5q)Exm|WfVnMA+L`dbLTdYsPB~?duU+(vgfIF zV~FMU90vVCa9uvcs$5!-=ySU~J^Vn`?s%r%^Sq93(yJ>G+h`Enku9h5zogj{Yo&h;qCq~L4g$Uaw{kC8jW zq=mZOJ!VxM%Pls)@eEQy+@5PPKjST~hz*E}!r&Eg|v11TEghb=3Y0a)pj&&Y<4pC z158Bw_w1ks$BVMPcNbrhrq@?+CPMov)r^y0O1m$R5k3PIn4zmv^f9St3G+&h3e7Xj=VY;-QE2=37+oLaeKnz zO(;H2<9`D<#z3X?jx*xO`naO07nM9Ld-`+i)?`=H~|TSxsR(NLaf? zrs~ZUB>SB#8;I6}rn$@c8>LdI+1m2cdcH1(s=j}cic_T`eoaDViNzpEVD0W%A)uKr z?{d6o#e! z8lfG}sHl>Toc@p7ULDN3NA+8ApYQWxP0@0tuAF*BaT(uOlrRZ!sDI|pbC$4m1(oiQ z-GA1_+|ORM;dnxpiQy>)$Dyg_q9sL9EKW{%0h@t%rbFhe0kt{`-cO_`Eg(u+5wWDE_H z{3m&n2qhjU^#kP~e`0~}R?QPKy{^}9>I84BV9~iP5cn>)oZk#s>_JH5%gabWOCJrC zT;o`qmfe;gQtz%Yn*#3C=k#+jGx-o7pF2&kxTWQ$hj6hY$deqBp=e=fDlX$tW@?o| zr^!1g(^T9J_T_yP+-0+b}DzGyE9iLxv&osPEFfTtbcUQ4E-61gysTZK- z=5U3Eau@P9b5~Us0SkM>moHXppOigVp^YZ329~jtHqIjz7!CWFOcTql_||yjFDebB z^|RbEa+2$Iz9}0yfNwNm^7DDmOf&3k|Jooa)+Ozd{D)Gtl(>w!-ce!}uRXqmdiy(E zU;l0P)I`SGr!*6u{_;_ZCNeh=*z$3$SN1Hyg(knYIa!3Ta$YPp8<&=kG&_0gt}aA2 zn)v&u)#s++)exe}#!ULD(&Z-Et?4q&=E>Nloj9TS1bNBfmEsD!@Wm!#WU9EGt)v}Z zUR2(%%vw;u6yqWTZbulQjZOJzPUXwr?~X6N zt$VIk_H?Cso8lNDmlopC0L`42(E8MPM8m{clQCB~DyskVjo=HI^a0qu`(pz8~AKOZ#UEG8PAgRv@8}_>3+? z+(T|`n&8i;$kO8ma(B%wvo@+TX#qh-YA(_zt1B^eLaHwoh31OL#|}G6n*e; zRo*6*aOcFWhM|+f)Z-0m?cw*0^AdV50g%t^1gnVVfEWTh8=O17{4 zx+4dan9H}dMmKCDUdph>XJax4^-mvR1sDFb@n&ocNmY~}rv5C9H6b@RG-wkCB8vSf zwtF34rTSjA$e2UubG_~T7PnrxoKQ!1H$7QB+L&Ytf>>7W$fbbxi&ixio4Xfzv{x51 z4=ji5yr1gk;5>3K7APTu9a55Oo+TQxJwh!GFzJg&YdoDm43wt1Gd}kYV=qM?*C5X6 z=Kl=EjP=H2?y@Q1Vf_N>(=bR{5NIx4h~#PPz`;pYrQ_9P_ioMxV==(R+2sREQO*NC z4d_5ey8ib*<1fs_D2lzjku4Tpg&tr2&Jyod@gKOCwwo}1 zR)I9=tJ%`ORXWOZ(q}kRlWCE^Ivm<5mQ^PakFyOcNfDyG7b|!2U3?Un+$6wGfFrQ6a_2=4kxXLO%a-&%f$d+wuIF*Dhi+6g?N2=RmB9 ze~5AR$04O_6WnPujFX0NU0e6}Y z@H!j&HPpG}mb&-$H}KmJzOG28+P%{aY#4Q$oaUvL(bE&ZK2A_;CRjDKGc6F`x9e%LJF$(hWZHN-XEa+=i1f8!TCw_y~!8K2+=1f4n`jN(-b}5!QT6w4fUbLCk_2CG}4Z+3{J7OQpLK2R#tv;t~^h3a%fo2 zXpt%Ni=P*TH0CxUToMteXgCS8E5`?ohAo1?RhWAZ0Dd^!(`+3#;DICi{~9rz(6;{j zG9x+IypL?~!6z2&lWn%@wb_Tgt+}VjvA|_IK-jyOhtH;GVru67aP5-bi>NB_*GEFm$+hy}Y$_S<}sskU?WbDCS&foK=K_iA1*Arx(?1@sU-Qv=-;0EAsP2 zu?j^JIzvpA^z!griQ7Mxh4#<(R|$!}eFUHqsekPM@aWpQ zq?3GfitC^CiVLhdnX#P7rU)cs9a+8ONeujD=Sp|sXuE^DNuX@6u8$L_W^QApB?Yh? z5Du4qpO0uBOn=z-ESS5opC%#|YscZ1-{tb*nVTCgo^L!^@Tz6y_@y z^X*3Zy9?e-;M*HMrfNbL@D~PVVxUuU{qEJq`_57$~(wlLbopQ#@s@z#?Udw8>>v}AZ8+H=9iGAM zqRH8>vc$l%5$_UsLeI^={bj`Scx7q2Xhahy`2&pRP8h%Jmop`UWO@S;&Txx#k;A}m zLVEk4WpHI^YZEEB>SEDTQMkBQv}@k4$4GZzIeRg&dvi%fm(Y4eR*lFlNr7mNSbRM! zFHCNnRUop{D9Y{A=;!}VIxVQzCly)HPI%WHH8!OrmZ!tGAMWPzVddW_sXcPzbgrBB z<~b){Z8Yh`_(CjsUB{2{uyFKcOq5FVe2SHx#SRc+xQvJ>+*aDg+8)Pppl}qJt=~XR z92;M$U2IDILMXvCk#*o|E=D?5K)1ko|4Kd%4i{ zZy8UZPCT_fw}^~ymME_sm`z=st3Q%p^U9>8uXD+}WMleWb7EB*m9(6WROuTUo^Tx? z8xkfh9rZO?SW+l8+->Y#=bD6(@4}p2w(nG~sD%G!ZoQa=_qW4pTylgB=VGDm#5R@j zZ`{P<6oz@li~4taRX)wl$jzC$gOTz=d93X#jZKOL%1Zp!zf6>hk=mbb#Z;L-vtHP% zG&PLxN)e^TI4;&3#Uo*KvD40hKnVgzMPKS9R_}$eomRNm_DOK10m$%2clN&w_`(aX z1U-{UXY44vd)3)Ro8i*Az$FF3m)?55Dzy7V4PZ>!;=kp-j9nu7Xismi6fzm(kVt(# zai;yUi>@wobvy~(Q9Ws;hCWQb1Y%1!k$;b@-}Mby4%m>FyaCXHQ#}Q~&D8#l)Y^>0 zV*Yl8zMh_`6_aYnNT@*56B5u8Mo(H=Ppo}CShGDCl zmteDhhu&{*c2yIU?wfIY*IGSMylH~>9k$`WMt7w!{655FRHV7JoLTh3PpW@JKU~IH zt@OvHVqt;YgSoQt#O(5KMn6pPys~mE{fX)$9PZ4!tumI% zb^qD7yv>M7Teq@P|R>|oQoN=OmS;; z9Rotd=YlQJ(s$~Pqkoz@*TEYaXVN?d0-)0IOCurId!(37)B4RSsbYibW&;r{dL%i} zAHWIQ!t~p+oJooL3-{Pxe5$WEnSpCaq{DcgvAoH-~)8`jG( zFcgon>*%^B5)m3Y-jal@W4UNQr8V@pbL8#u8Fo4p@9NfyQ5IC&mN06_TDiO${(e=uf8_J3FcH#xK zp>x&Qz(=Yy4s1$12`}qn$sR}f{t$8c{CcQZG`Dp4L$Z6v#}L_pZzIIbjmM(7-`*q_ zbRai`Bao60S?0@`Q9|8jx8q}1uV9jg{}`cr27bH&3VWF$$QLdVZ2 z?7H)zi8$)ilWK4wR!m@}9S=lQWu-T;Pajc`qCt1oxw?2Mv|?>n*IKW^W)nTsb<9ce z?K195p-0`6jr??c1W2hBDpm!!?BU`9GyJlAhVcY};|HV%Q)YY$31Q9ySGQG#&ir%8 zFmFC!y1;^3P(X3%o!pF7!PPsQ95S6RW!(auTW*1sqLUe2F~)!#{|z-v730^mQhw@f z*COm=3TMRkg6co+I(q_~qu{E1*akwll4+i&nJ>-> zP9~>c5(l7|d|ZsEpcYed^9M(BsqQN{@BW2^nQlVBzWw5)DXAx+)egY9?rWaA`85s{ z3mw87tIqeEoetP@GVRecl`+)l;vf#+$$xz(lEhs#O;)YPW(gPcTegao*rfV_yt?QuWbuNq+5oVcMg8&HsMmB?MX{On9>C9Ut)2 z2zq74dcfCxobst{nIl>0Ttrv);02_vBBZlH4;=kI!iS0kVzXAhd_9$W=kqiLMeflf z5)~B_$E^Hl4HgkLqRnTpCSGh*@1^+9w)ZNMxCw>Ho^2?kM~hfr@0LGe-cIQ1>U~Ww z8&@G=B<`;=4oS@UB9Upb{6zV@gdTMv>zgPrf@e82D1MAY#1xSK!bxU*e_DSYu!4&R zWfY&L%^wEHT98pjc#0{g-?Ue2t<`B+dmfv>o{piNYah1d@~6X z#Kr$ zxgRGgkSlW8;F0RaMFs-IfBE7x=>Hr;Wu zY4ZlVxo~HmvVjed+TuKF!I4#bOID}~@F1$}Ys7q;cx7=dIHLfzsei0#7%y?sEvfA> z$qzECv}U|_$l_-WHs%KY5~lFi1A~@CoMQ{8&*7JpJPvD~ zM0D29&tPk;RXfmMLa&CkNuBr{IL>WTqt$z};lN|(AK;pj{cz2ylBb8$e-n04DJ{sD z!_QaEe;KID9k_HLVj(xJ>TS(3`EU1E(-*A_=U+U`9jLoYSo7BLd%2Z6(EJe8Z9m>^ zCTNqGB})m7CsJ3*8*{koY$-ig-4S8PkHcsLtp)7nX{Cy^0@PoYScW|W1b6XGEE$gD za(t4uTPHh1)K?$^sP0FXV^~diyUy(d5q61-Z4Y_sf(!R<1Ya?X>SNxIyB?__F996*WY0 z?fh2?-eqAlR&ym zoTew-E`ij;OYuT%B#F5`45Xn1WJQ*A1zTjP%d@F*d-$Er$~_!dPZC#>XE6vM(~0lw zfrF8nx&iC$-WH%f{eo;Z-s8Wymz^@LzPS8~-|ainZ9BvCMjE4r5aR#>(;9vF_59q> zmTUDYSj$}-z88cNwXo&iL(0!u2@l%=&~r_vB>AfoQ|EZ!uFA0{tM3BVE@{d z4)Hd!jsi;R%ToIx_VwWTZ{y4_yI)5&Ge=mpbrks1JLTUV;CTePW9brg(OpT_uAY78 z!Bk`IMlbMjjz4`TU&Ez6QSn(I%?AuV8LX^S;rhe_VIJ=Vf5*9`e#YdeS!IAGmFZ}) zZj9l})NvinUk{9tekAU|-B~@pSwVC~IETvi%(blp3x&pLh^C7uyFK7(I2s>Azk+T6 z_)r%BH_o)rwS+Y@Aro z@i`+R^k0?ZsZUd^!hOR8V+{H7cc-ubenM(Q zUinhG*I}Tsi9PP%?mMr&8wz+eNwVF#Z1`2QG-v)Am;S^f33T8oF=300zA8!=l$WKw za2-pCqIY*Lyu)){Mq*esKqLS`mv_BUJJ-waPu+5lZ`n`=!BH z5`dRWlxXsK{_k|nEim8uzi~xuXg>V_G9dlYo!zMvv=@Uj!|G~@$8X7k?+%;+iy*3V z9y2RiSA>ba!M$5jw2xEgkjORgvNV3tb-XMwCm&6(IrJTu4ksFpm*pH=GFn~t0C1gk znuaPMiVNB?KBs3_Fxj3j|G;9*<)Cf0n-y?4nXV+dcIN^QIwQ^o%f`RbB=^NyyP!#Y zc5{%y9vx|1dxGIveb5}T2Jn<_k4AN5C1JWbpmT8ZV_4OkyNe3w!{Zu%JnbZ|;=dCX z$K?wZUtcRTJJ=qNpo0!uI5bFjp%$-8bR)I>8?F<*EUv*gopE@g0;E*WEGm$daum#E zn_~@y%0y_gw>U<1Q-$PxO32&nbJ&(l0kCt$X}^1RU-MsTKm6$H&+Akf zre{K}^(!*&HO^AWF?K2v`E=c+_DPT5k({61M&73G`u)ES{rvRlf1&^7=8{h-psLtD z^@wkD_OfFYNUixRpPH`+M0@4DuRz$$SbLnKK86C^zS4*IalglR4^F(v*b zBSpL!mkrJH|- zR!Q`ly{RKlOLwd{b1?67cRIk)T9lbF(-bKe2k5M@ z&$!GBB~TwQ&diR93L>%{bqTUf#`BGJeUwMAOprFnmXi*;urP&B(p(NXlP*|xlN;da z#`y(5hlSV8(dtd0Pj7oX|JmKHc~-@;`Q7Y3yOwDY2>kSadl@piCZtrmOE>}@&2mmK z5eb=_F1bB$KEQpj9Hcx+O|$MK*(T(KKCP|1Nt7hZ%9$2+nwF5T@SW~^gMkIsKCui$ zB0k10d$&M}P&Dk;GsZ;Keq7qC%j4SH+o0|raxmOOYv3NTk{B^{;%9JlX%XoDBYZ#9 zb!(F26QLG39otAz39F!a6Pc!c|X_%IRPg3udTPvzf|M zZ7h}S&9lY$kQipVna4GA@|fI2xqDvuE7MapBRg?H(Lk&dL|`B-_mNI4U)+wtA9uZu`M&9`!~Z`8Y1()hmh$^~bWE~$+dZ8=xP zJJHedb!e}xe-^{I9-5FxiO#3S z-njH;!O6Hi3j6B#;u^fOq2|yzO~Xx`47m_F|9Lp^1rR?UYA&Z5oa*m#`TV+S?JlK6 zR~N14N8DF?%G}3iCjBO#jduGvA-?ry8;i`OE*8B$-gE25Ck>S3Y@$-2cU*+8WTL}U z^Qgpt!fM2(Q}$JyWv0D{_;&_g^3|$%EWsB1@+H|{oaoIqNN0YRr`6$0@GH|d$Hepc zbG7rI(<;``3T4`0y74e(p*i;NBC2b#Hvgn~b7nrlueocWZRW&ToP7i=)v%1~h^ zv2XUU?Iw1C=v&vFE7H9ur%cATJdn;7yQ5gA)QhXGx-T)q^i4bDM{eE(C||$`j`q5P zy~Q*TBL%;t*WY*?tN_g=39lw3|0@lViIJ6}{kzEZH&isXapQOBZZ0{v_&HAP6#F2= zt^n!i8?at5|0h`M<>L1Cu8w90ia|8QMjyPZMjgg7`%6fkN8;#CMI)MIrP4>f4dnd- zWh%64Nykl1$7J>maV|+#hu!svQhW>-wB0TA-CoefhuNKWPw^`Ts8E`5>0-X%V&uG6 z1)koGY>Czc31BTDB($Vu8VqS6^EbW3d;jqkT~5MaBu5>!N&E zUL6;mY%3@)1Dt~@n)!vaJ~FDLOjEAO_VqzgA1ikjY$4VdTCs(_iA%X!$EUle73{&%x_Xu1%hL`EW>=P=fl@%#{>Lh)_lk8TIm z-LB1ks{}*0xQJh~{dPW4@fNmtIKz1OdD*@_+=-;t;<-i&6;91U1DNqWg3!-$k+e;1 zP{?WGiI&@k`b*HVLCB8IMf}7Qi-3#!+`U4s6YZ!4ap3&h+>6%*yJM^`qXISAD_h3@ zW8{K}ld3`$cRGi6t65Wxq_~9@8s_HueFh7s=PGUHm{l)o2EP8&)o%iBwQ*Wo9V6tz zb8r!4Wki$;)7f((DVS)D9YKFXxoc2x zcmfa00EL9@PGVqpGG^pBqcB705w?yDN|A8nRLv8y~h?UI~a{LHoj0 zdu&$gW874n4i9(1n4O*&y<3GOjMuYHB zy+HRhd+-W^J}HWH4*JISZ?*L-=%Rg}tmkCOjT&wnl3`{3DMR0{3;zqU`*~h=O;lvG zt*{Aks{i5~wy< zI+|`isKxBfflRy*5S>}8Y5wB!E%*w?}cH)67= zw{j{G;bBtO84VVkAA4IDkpY;w%vjs0(yKhY%g~onWpKG9qWfZ@I?- z@Jfa-sAzZ-fE760Yt|Uv z*Bng($ulhTb(--hJ%QAfM3{{a+1c%3itW@86Y;5yxcQBm7xyDx2zYOeIw4z70Ag=e z>7GTJDopO__KAQy|Mx+k1}<-ZkH>0+njOSLT^R|8mKd;-=1Ht0u z6AIF_JhV}Z(Lx;1gVhQ?9CF~ z$Rf|EFNgf2`6Gtf@QBP4kQbvp#;Zxkl$`Ax)Qn7Dr4Xf%Uep?P_@9%rvrUFoj0o;| z3Zx&;Drh2EPJ>mI>rhropEx#hi-rn>ZA}?cS4WcF3qMz->N%`N7@BuqMRH%fcoFYf zcbtgkAsf?!Ab=EDe>-rgEiBNZ&6~Q`pKKMJm6({A4i)SNJW(2{$uA2VTU%RST)K40 zU2&EqM(fp8*;oRL2luX38j(@O47iPst6}+}RzYQ-TD2(u(X_mT*LRo|xlab}Y{n)p z;#wC@%&tgF4haLlg#Js%%@apXvo}yM2a$a&Mv=VkI?EJdcLWGhp+Xc6yJAa=u?coA z-MG))9z9O<0++lKv~czpiNh%%X|#X;e(T!DM`YuHo~VQ;0ZHu?iTF_XU>{FnzWaFd z$4@yX`WXgk%S@&+IKK+Rthnw1yZOP+&Z6@~HJP;i2M+L=i9|%E)NGm(HGmuz53VQ0 zdUJiH_s8b;^FT49tMavaCwE_wV^`?r<|ctaZH-)ATeCZG;6M|w=(Nqkl!k2y=2bF# z9*7#|_95dl1uf?1?3&|?&)A4WXtbP(&`^U<@$6vreEITar>Y%1>$=)^cquX{{WNo- z9zrWgdx^Vntv|%AUC_0ljNUbhc=7fv$gxWbXk`p1>dBWagl6%CtF0`X6yJ`i3hb?P zC)39i5hdhPW8%3Dzyg;mvW%!sPA(R{BaKMXz<|^kmBm%uuP7Vw;v=7Ey5R*@-rVt~ z6Qg=%r*mKI2-^E`!t}Z0|8@R?`=Z#=w1=Iypcq>8blimaY3Am@Cy4E0KndgKr}aNj z{#4^H?K2_B1?|IrmWgt?yQP{3SNtrmC<57FWFrFYSy~LHC6u3IblaN}gk#S_EYD^{% zl1-kqbq*O+D&vEwAZdk%7=2b^K!M$TTqC5@Ed(bmS#A9Gvf%q6FVf*#3fx@?AF0ffkr!*Gt#kZ6F zUwVOD?E_M@Y&g9R_#Pt*2von`=|Oa82^=X5p7zwmw@TC^zo8(LX(p!2 z8W~{phhmO`gVh05HNTsgamG7v!9AT8en?tN{56l@!8`L}UG!*GuA_YSo4@Aw%w7cS z(*7R`6L`{$2c-HgYlE^<0+2$L$Glcl zuOGG@rpKg*BzBq=_gt9moIO>#K6I`?vlF#(=zlau*qFF%J zrVAlJl-P&&9r>vfI6gZ6@cf}5;j|KOuV#fcUtjyq5Hf=}bAMc?VKH%Q`0PgavZ~q( zs87q%=7lChhBYnc&t`=Tt(!wQRB?{>Z_LHfa+4s;_5nQmgiFA*2@4k z7P+WRFDiglUp7NgabC`Mn%t}FgZ!4b+L10u%-x!wUaad1$ouj;xjt!gGAWHh0dDjU zc9mk&U3n66ZS*?~n$S3qj;W)49_`xltoSWqxu0-3%xo*2-Z$Tt)do7Lj;nm5ZdjXp zp-$P`@d}}xI7{nok5!C=NkEeXR zL^I+)swid@7Ah$_Fd=<36@N+fQP7?bMCDlPR|X;hWweTqt*uvXEne4E&%YogEp4FT zrskg))>&woZ0KOy5?pd5;bnqTe}2ErvFg(4so2Pt00)Lj)Qi$bM?XhT zIm*_o6yT9`A>iWK5X-ZX2V}W%hi2mOVMN9fo(re~>?K-smdESvs3-!GDgX+F zj4t9F@3<)rM=X*Yslq=x%(pArEthy+am1p}eOxY&yY>BJ2*}C3qWo(Nk`7^pzA0Z& z&MgYTqq7Oce-+sp@LE}S3fQO*_}V>B$$|WGq1HwnBjQZlF5IzuJwl#Ugkt-_zmJL?==xUPfw4a{xY;&SjtZJo=(#4 z*E4y?sK3(U>IpxAZW3^2&tKL1Q7brs2t0OS`jvsU6&rS_oq(@HMg5WJW8Ue0NZ{_* zYW4hgW{R!2<@`4;UJyHrO^tbg2}{Ldqii?v;J5060g_4C%R&j)_r2yTy)hS@ z{Myb5hJ1Lw71{H-Hsk2Oe(*>(%Q%`+m5gN_xha0NVNA->TI!;8KiWGv31orV)*+!vtGEHa&u|y#~7Kn}?Wpn0(r*hPo(72I`$y8rK z5g;9R)!pTRhIQ24=`y8C(}Ylni9j5OfA9uwhPE@y5RmzBN&8DY{hJ&2mAyAsgw942 zTrgVEL(c=ZgRHKJzKp74SS|-=nP<-h5%=@<19*Ai&V5cuVgjAWLCs*X4ndxV;tTY} z7Iqv9xqEX?CCf`ul*Vv{pz5>;QT&X;?{O>6s5Uzp1nHpRDcQ)p!%*&hPS_3>jh&6z zbqUM(0|F~A@jQaQCa5E)(g(YH*QzQiE9iR-l{gx|d%_fnnsP+ifK$EAdcc)v{tPlQ zf$`Q|r9wL={PQ$C^%LRCr5rE~rwce#{LQ|5+@W++)|+vs9gs720sJ@#qwbPKY-Z0CWd2nQT##IEREJP%Nc^!VtZ({)nF;p)TZ zqUlA%s^jy|9Bj|f9=IlAz<-?NagWY`KKn&q2#*{M{BFQP#eV(TutFHpoIx4CVfk5X zkbtDu{niKp*!XXse13^g?6r0U<$bdM2ap^FMmvoh3ZZ!X}Cbvxwo78f}Ny- zZ4TaP63CdndC_xbmbuLZvYi`zeyBnT9tOau*xMht#V6lnIRB}y!$^XFL!nC(1!b1^ z(u4kxG#dvwb5vKb%yFGlK$Gl__xns#O!E0Wa%!fzA{EF`o~M5BhGH?eKnfj!bOU)+0hsMAG5!OYy=|7_ck=+cNdmd5{=w#Pws$BD}#rRLw8=B?wDVZzJV2^nb(j_ z0}GBZeQn6uf^$eNq@^(XKFZF&!nIqw%@eG>W{dUi9DVM-VTB41b<{uJ7OI1f*K{*P z)3q;(v+n_CIRjtMdt(Q#dVrIU!4-BCvXieD_xYDkaQ+CVwKocH6mD&ZQlP8Bql)WM z*;aPX>`%n&A?{e{4LXIP%ZdE!8*j!3zoj1Hj+Rm}Oj=uBd)&P9Vtv-&jjF&ASb?!w z63Se&U{7>V zWilU@tfJlaL%jtdv-JLtm+bfBs`Hi)xNkuj;yM~Z^s84nD5<(3M@tg&-GrNX23Vv> z!Ih1LgX}#6PU*G(818M{bgLfwTSOy`fwBOco{6GYe18Dd>W~hf=~>Vw79WY;{WDP~ zum7MxS#mYj#sj0;Wp2fQx}b!^HFfgv_vo`T?Z?Ot96W>|bZ1RtjERHnbOiQ>({E&e z9??9J&I9{K9oo(T)uWGQJDs1uN)!j7)%!i`qa*y=S+}(K-14w#*+kfsWvjzAtBayN zU5{>+n4tUd-8U%xXo>_a^yb`7xNOi>j@7YdmE(I+cof^)G`KYuza@9i*Ftl->79kT zTdmKYAJPgp0TUNu+pNLp=(%x%lPASJ@N4@UX&KnewrW|o@x9aB(lDym=8CgVSPdD4 zTbNfj>|aW_w1;aCYgMI?ueVs&q;fKJgG^T|@wjH&7=12{nn${x6JKz%ebp$A%Wl4W z{UH?f>04FW)E?xO9(ZA&x`0eOC*Y--9rNXCTGX8O!?b^>dvM33Y6%I`Vno~EA!NEF z&9AZ#5m+Hd;<5E*sljGSsE1r09?8bw)C@rDPay#MXQ z*$X4qm%El;#1IJg`(sLgECMp{Wpg9KS3BJrZwfhO?>{)Z+(r1-S~u4zdZ9@RJ;GssCF$8$3k4}|4b(Ec2 zXMDM0lkI@B{P0Qz$yA{x6C$LFL3A|6jFO;O=jp{-##71GCTK`euTw_kE<#~%y$LAR z@VR96v##_nToP-<*5k!}?8EB0J<#>oAelUP_#KRD1J{dy5MI9yM2hjS>;MqnPK-<` zH>yrN8As;*{uQ`R$ z3LgKNG`BL0xY77!7B(GSZeJ>Dio?dHZ9SF`E;WC)vqO=S(k^YxGWYuHeFe$h+AWu) zialBqijcv#`*=b`+UNZ2W(u(4ThripHJ_D{i{)CO2D7T`U%0ePY@5RJY|lQLD>L<5 z?Vd6GWNyet-RtJiLS_rL7{7nfGizAT0ldZ5s0s(l1uts70v||*h799HAH}RK;kyL8 zWxxj#&O_;;H^<^5QsU|1-^$qL#UtoJ0nbdVEUB4!_x1_>pDt$gZ&ouVlc+nZNxFvJ{1RtBXJ}n(!k1f#aJK&S1$XhP7hXQY%U=g7ct-lB+nfE&KMe3fq-jThJYr=LZ*gq?BCh5JFyiQo z@vmJa`!4f_vX3z>jX?{q?sfiwfTp2+V~q)m^}2zzk?b7a|2l(sSfx4{Qn%H?vogZu zsfD^_kxqjCxM0jx??L6FHD*RYp9uxIj4B#OXI-l$a2MGp;N?YVWab~6kuzl-Y2?vYTMI0oM?NY&_2P6yl#bZ>4{ky zn<$z`SbNSE=c$Ha+o;uzZNafyfFYPClTr0-ng3CYw*krmX?1AVGN_a1slEeLaf2b2 z=H5{50^v|a;`LyYU!Uvq#nWB>IGt#VJVgEFX#J{OH}g&!@#03D?9ge)l0!C^K~;1z zrn-JtTJ8~IB%9yUOMlD1xQn#|$J7aM)eSiJt5(Wv(L(hu<*`T*P%-`@<-UK~09E>o z{!SYrCQ3ENFM}_bttGP+P&D%BNx*f`$4Q1$13WfP0eQs67*#=Z^Uu+!;ErN1;BX54 zSzf$$f@R47>gZN@=c}v3NbZS_q+5+d{%hiQE@nTD&v12=h+gM|I}ecB0cKp zfWbchF(IQXiq6tUJH{F{&u1}=!2_PXWK8! z3h<0$GOB(KbVW&zIawa?jGSCEd9UPZxLpJ8QfA#x#Nzbwm@%iRLkMLdH=`>D~@IXS51W0VlKiU;e+)c4Oja zp9B~E$k3~61^(>{6Ji5Fie5N#CZ|TP-(S_Wg?{05=WmKZe_wF9cgl{s*Y9LdzjjB9 zTu`LK%F_|A1h@Bp^Zp0AXMGW`F@KF4R~?9g`nPO z+*YpS{}LK5F7*c_Iu-%$CC{58V;eo}&2x&B%L`TAjLK6B1$7YWF{^N@PC3t`AzjMoT{(j`oj@ zVB%?j7wGPcpf@joo-&L=3b!e9$m-N$TY9`_ov#D|YB3O1E$7v6AHh-&?i~L~RUUs4pkZ(;xTNhFhs9omHugj^aoymqMUl0i>riZ;ed! zhjMFvycSm?LyJ9XqaX-;nyV@5pYh}s9lIv1aHB0b64SS2s&Ei zdbK}53OyMEtFFqSza?SBu|$d-ug6Ovm9~y!PO_W6lhWtpdf4qaCa2HhrKc`CCj`F< zT5uhUu^X@uiP+PkvbBCqI}K@4cXLGNg;q-NC0gooL7%=Q0mTo1@)SS1&^n(9H&2wldZ1mW5XfTRAxf$;sQgZntN@%@UQ#Qx8qp@5JFKl-CzR z)$@yK6-K}ab8AS|<_EVOAW24!kC|%8x9!pm*AI}|0QoE(2G2+$%nlEjG z7TX73S9NrHYhg%*8j4HwN_Ud!>n)}kWz2uAbIVh^>^A=&|BtQj0Bb6H;*FJcRoGn% zs1$b%pwgvDcNbfzN>#cG(mR9>7t6{j2pW*y6agXh-a%9_(#3#ukrD_H1Pllv@a6{H z|NFl8&i9E*ZtgwzlsPlMnfXmH`zV_{R5Sa)$tPdGz5h+G{n!nduSZbCu6f=|0g1)a z)Qg-Iw}UC!pvTHe4)$|E`78m0IHiXA1fc~_(s(DKw(sJE9x{SmgM9gL|389z29#Tw znd6UTTU?H8Rfasbw~a>QnVqIl`QB!zLAS~mpGup?h%iQ>B3fuW#Lbdjgw5+00-F1z z3p_qBJ*wBdp9rb$NXJg|eY{SapBIhbZ#j1})#S+0ll%2{zhg2LnoA4_diQp#KY6N_ zg;?}XA-zHoa=S4a(mcqF+BDsDviX&XM&GU>QR6#sG);FTkT7FEW`ip${o)2M zXQ8td;Ec`^%%G2-CBcxtPnL8Kz?rZZHbTHNnfSQm@e{bo=W7{jlyDE0{cph~n_uT! zzgf#9ygJ;PEc*fudn4kR*u>SlBemrw-(ybw8C5MpxW%Xg9cI$r?Sr=p$16o;k=AOP zFEqKO$+?l0Fx?biR>bK;AvU!q@2*To@kbcfe|wu&7H zvV{l1!9MMYR_+k5!7;JCK*dMnoS^WppCxfXQLoR<;1&qRsiNDz;e(_gU`zpmq&NRb zV)+qM+_1uJUj%ZRrXg@+yx&c8=at0fbicWa1QRnWmdyrcjY-x^ihR)6YBDYP<@=MN z{?=YA`?Q3#^RcAX>Ehis@V{{WmaloVmv$4=PwXaW@2>T#5zv3D-BXjyXyf*f!B~7a znq;|6{7fcobg7$=sCUfeMIdbhE1O~T*~1PYBoOqP6pq7aYwTp{#JVjIIv&X#H|`6- zV^Ml13y6b{^<6XoY3>z}J4u@Ss~Zy*;SkB4Jf5@9d~hip3$7^e*epEUl9LTPV+rWJ zL?+kwq)M}^)8pqJ%Zrnhh{=YGZ!fVbY9N?|A_qI)xuW}D{P8;$c4qMxoxd{Gt47*? zrSTLz5L$!JH115f*ZLe}8OskS;ULR8Pr$>P-xr5^Y5o5A>-!GCoI7T(7yeVjmo+G* z05zO*j=t%gPZ@{84XT8Qa&b<$l}5!DLTAy)L~t_LRZl_^IvMbMXqO2*INk&(!n5O5 zs}h#P#O-cYIC$3 z4iHRTQ*2eeK5*=4L3IK1_-z*{x3GMDb7ObVFx4^pv2q;|%8~mHM#oU7Paql%4+zde z0E3BuaB<8hv|`BEc+OtNEyQ~H?S<{X55^o1*|~7;=Vz$`zZ^Vm^xF7fhdq%=KK*xT zu71B!X)dSF(-MP&Urs*LYuKTI5xerspQ*pm6xi+#Hq+YIH)1c0iIdDE8MCc*Bl*2c z@s{-+KDn65)QK-qJId?uclSm2%OSPlBWl&i`9CbU+u{=HI^T}dw_ry3t9** z5A3ZTCigx_+;uvpWn%ik+)U3-$w;fQriTNfCKUxHp0NT6@z1Mw)LU16y1h38nUn-R ziHm~QOSwwGd`X?A z(m=<^N@d=m9aH<#cXwO|^V7odXj>svE53!5Wp_^D32-jQUV<7|MsXd3AA<6kcE6%Y zaH-c>fWK#SAR+OIm4Q9mSvHogzW%WNYo* zG6MlCHp)!~Hi-7q@|Pr)mVe{&DY+u0gD?FO2cjv`Y(JL;rn}b z0cQZY7dRx;1Kn5W27F#_Gz*&-Y(Cz>ye^kMC7i%`IiDBn)qi2L$dVu^y4qk$iEc?+ zLB=tWXaaI@p(L{XWcve*XTO4&51=sv=b*UhgIr5r{#Xz$qJfMq3L#QLEQj>?PaNDHm+) zGIX-O&cYv(TKZxu%#d^STv^Mh%tS(O&7^cXHp_r$T!Y!%@8a_+MP=fYcIDa!UG3V@ zLJEYUspIn02_x&1vX@kV^9q@JQ55A~xqb(Nu`UuqL)+hLz%V-#mf-1EckI~V5Zc~t zg&I5L5?@>W-T~gtRxI9m+k>Sm1Z%TRS4a(HAFe+^x_M^+iFjY*ejG0F*&_N)nixXs zI}))Yr?v`6f&ajB{P87G&7zVi?lv!o0aJ{(T3ugV$`_MYFBUN>d0}+=iF1mqi`k_> z=e?YwgZ#MChxOLlnv2U2;>egAVGOS*Cyw*&PT6GcTH{;)#D*kTUH^5|=w{cm3xPO< z<~){ftZHcHd$Up}>v?(kO4Et(vw!P%`NrG9h6E_0+s?fBPsvdB20o}gJ@}GQe{!pU zhK5v14YifoUNc*{J+Z@Lz>+S8M*y|00AEriz^myGdxgoXnV5Td=v#3Iq-6xFaf zwKJWI=P_@x1y<=W-{sgo1t?3%Zg)d~^r6f_?>bb;(yoiD_SkBbKWBIO^5OLm4iInEIdGt7<*hJ z?2SxJ7?;G+obV@u_c({F-;juGxyPlzPipGfs)d4?I{EPMlYDa&1IvZIH>EKXsi*+C z?ae4^TykR;l0$HJjQL5WI8n7lvE2K6U;qTS20%GEZrlO4jHoZ#64ZadcVXqjS(5J+ z@E5xzF!E53)=L{IEJKtkN5dl4AIQ&CV;0xF(VA&eq>&OSR(!d}Uy^>AhTaLKEg5xc zn~*07@7l1l=LPq#!OKh6rrUwK2(7brLRV-JI8C&9njE4cxuc(pv6NV@Z7~1vg*nSW zg5bVk6%p2N;TM=DY8kd1C1_p)%})ax8uHEO6sZe}j1`s8QAH7BvFN)H$(h3_7UV0> zk@etyg(e%pV1)bs%w^7o?Q@7?#`RX1vhQk@6nLP-cSO}eW%VPRh@Q*n7 zqH+K74_If~vYRS_Bh#zl3m=Oxu9IDDLMLtJ=(ZQSrz>sufY)-S8`pk$?yvS-Ume1n zr36=SvxfE}9qdaJnb#jPGs`Tx+K}8WA7xtCfKut1G;hC=)>D!X!ndEJ=nlTJ^0X#U zBHTx`n<|f%Xq_U>;k>vfHjw1;*~vH4KS@Rcr$itSuZ?N8p7{;dCH^2IP=YiNpLnxE zRPBt*L58WsJEnj<=dar}*wqP%k`7=m^rjnR$RzNE{ zw4FDyalA+WZq3RPDRXlm*a-NRke^}%DoSy~az6EnuRkJMP$hMM!2L^T2fF>lUnU04 zjxa}5co{T)=5^mIUDst+#s-3v>8-@Nnz1#_Ko8N)rBE@km|WF;n3AO74~M&XAKDYd zq$KttAN>6;n)es~LGsmX-5wU^p^dqgGG6p@E(vmGjqZ2sxwGOTEDAXgm^l^zg7yz> zj(t(se6E`M(`x=0s(#sGAr0HXQ-k;mC$gN>uga|GMS14K9?USTB%0yaEoFbhWBsm{ zm)=E{m{l`^aYF1astDIBq|S!_dT-A0hFH6*U7y0SDZGOjujf%9DvZUc5q#e=0&9bX zWQm0T9J=S)ny?)>cQwK>_*JKKHstL%y-XVcok<_JjB~$Ea44q#bsX)bZM@hf)xm## zzHLOof^vPpWhv``$~Ux}*_7=A|5(THBgm^d8!3@pQZoH3lymlg1-MZu+OP{gLZSM) zo|h;e!R?cWL9Ir=0CyC)r=)9~Xk`Fp__&|+@A+YOADnN(X70BftlvKz#81-94c*Zx zvzs8yJ$P@i2fGvr+>`9NTgXmAHNG#mIIn7nQcVM?joiYb_l>uCd2*#%jx^D>Z#&*k z*SPzJl&FTg;nFiqTI#o7f)^I6KD67d_kIY{28a7*_$s{Jx#m-xER+ii*`T6FYU)Kv zWj{`Gh&Qr^a4py5AJM-^>GedWoB7ml50qRysi zWp^&#%AT?o)^r5=Aax;4?l4B`)%}Dam6H&P2gs5jZWyW=vsC`h0C3pAZ9c6G$BkdY z)o}~{#+r3NClUf9fwZO`Yc6tj@44z}?IIidz25uv`#t;!V>${Qlja0RTPG=2ComUt zqZE;n&l`3VS4A^7r?L+z%`gn;0;Kns=2Ob*5bMorpL2{hzeLdkiGID#)NEnj@eMmm z-(2kVk@Bi?(9w9W*LqS3mooO_6CH}=6OjQ@Bz{~wPYquA1lS+v5?BIR+)Y?>(fmJb+CtCy^4#ZEaG|;V zTgZm3ES2EnlaJVNytH6c`_6d>LjN-Rk{;8Em$-SOm8<*1E-Mi-u=f(S(z4_+7 zwSN!okc=Gn_TKbsk2q!;e7`D&%zq>wf`eXK<&bI`|F`z9is{{+jCUyWFwCcii3 z@G>-5QIg93BbDavomt0r(Krmq21{W=_6u`O1ldSQ@9Gpz%lT<%aIT+$eSNXZ+(=v& zPV49(ujb<12jKGOKe9BR2%{Uu$CB$~Hfr{4{RXVmXLQViqhAQDPXA(AroNbfT|slu zkR_wUJ*kmVguy7Q4s6WLnZ12;kByFW-Whwu3WJKLy{+XK`bIjSUGF_~k7_zmB>nF; z`Pa{dXj$yo=5-QoCHh_3z~*R3%t~HRYT1%UvR7kZv31)lpL%l2{`J4xO_|=U@sc*} z_jZNUTeLPmjvlEjB^L_e54QlCeE;X3|H)Bqe7!r0|EF8IjquIJ2ej%%Zt~CgayC-0?Enam%1e5D- zPSECu9JEM6VzSkO`0{`VsXB7~G)V-PJdzy@HG?vc-9&FsTkn&6%Se-lVD%jS;2mvhBy|>a2t7yK0r(wKC*m^TzoESKt$_7SVtYmTkRmF2RqL* zSZ3Xf%}n=7TrG!_v-6hnqw!T7FZbVr#gPzA5J4Cmrd~m>*HG*jV!SWl9?-7 zf$hp6PEc10!vmCLk$;{l{fBso+I_X=+{|3^I4?RMeQTpau34#u;BvTu0!7i8CFn#8F)>XMbFbC}}5XAO7!*@z}QP{mthYNxpS3|d2cS}eVVt%lfl^i5l4XdC&v{l%>kI1< zsM`p7%*D`m#-1@+?&k?k7b_xWmX3%uhq(rr6N-&~eSRlb-F^1tis@kM@e^O)VRb=H zG27VBY@f%S@juAOwe@;*8%8Kmimkz_^GSdGEz>OKW}`AI=ywVcW~nvtV;%pgpAFB8 zPm%m1Yg9;KSV5wg8jiABQ?!#V}ejVE(Q29HF zKR05BcvfJbE5nOGH$GDCf`Zm=iY#=p;AU|~J9WT!QftO5u2L6)(VX&uDK6iAFv;rq z8|0)-33itvgr`u#TvH^Cf8A;OU~N@5&HX%P3W^Z9EzI9SBf&Nxu}scKQ5EgTZ)^~M zjJdZAQP^P@&V28Wt#$~%oW++7%?rD?#qMm^UZi3LIV5NkcP#nT7E265(0ymoGOV(x z6pAd-h=)zrdh!*4kQZq)_3Xac&A}K?p&^~k9L!?1H%DJ%c%F4}e7(4L{x602xc=2W zuSyk5$8L)ACO@(2`Kie~VVrvS=C-E1A7e&xRt*R+Ae5tHaTBMdmRs{1$i*8hy`}(v z3FY%B#3`HpaKU1@9-0gIdJ)91t7-^@GrW-V`r@S@l?`u&>1^oNoKfS*@q6Lj7e6s@ z5U3F{>`GqLNT*I6T#Eb}X0Z;%M@vrb{h(ldcTA;^aPV9)tgVuv?VZ!2>hDvYwouiw z+5*KcL!>NnxyVvI5*@LhSR+9wz+$H*pT=UY!LDub&w@6j^zLuNc;2=r+`SSoTr>GtOqHj!{;38IDC(D=DV%%gK|bvv`EqQt{FJ ztA2zIb_v?u4{AZMN}ab1N%12hrp2DrIgXLax!e}o6q@ZE`B$J@1Ui5xPF<`#P72KZ zc{?766asdR)jvY4bVG|UZC4?lF~xk_xlAIO8bra$sTil!YvC5Fu(D`onU{z*=-Zw5 zxJ$qi;p*4K1zq_O>xHo`s_+zv)0lIYEdiE2bNy$GKxhgOjS!R~Ys5&V#z%~g za+ep;3*jdHEHbZPD=)&S|J3B*+krx)1K6;@i`%lWhyuGjfF0~4as5sp?{KC8Sr=}n!k}8ah6i8M(2Z}DH z!Ky%t)QpPk*H{&eF@#U0@E`DtVg!`X)f7ykUBEd*!G`}dyx=_+kmz!c3kY+L5!U5N zlE%qD1(UNxEXt&t1%O)N4X&qD@z%Jp`eRDF!`TEbcjxV7L88=S_ zTW3S4dPhFSqUh-5#Ub<{dJYEYb(L?0{8@+eVIOy%^Je+8f0j}r*+kAJLmp5a@b&+7 z06N#Hk&Y!lQ-#A%M`w8nb|Ke&-Q6xJ(N=bk|HOQ&h44~wQXY;r_^!e!lKb)ayLDvr zb^zKZfmC0Hvcszwb2_P7#!-04OmxN*Gj*I!ag5%J+t!BvD&8!jzSw54`<8q)R?aro z62P$-k=E@}+RSy5v;^jhPiQ1mmk2=#B^R;Be%6GE7r(_)<^+oIxZ&vW0hIWo3^H}L z8gNeN5iaQ>#)DQ46Te`>Qu5iIRdYYBt$#SII`9ZaE9k>#)^-bfpI7X~{n4o>&%s}k z@)Q83PCXdD{ytl8HvT@#SW6YYy%!|w!rQ(e?a5v5j-S&dBj&v@fWayds0pcS@VwY5 zmMRoEl#ViSy=3v>q1`wKbuO$yDeJ<8tL0w)>m^6Sw-@XS3%l-=>k`(>wlv@VMjTs; z&q$C0EwZmWlV;`_-6modWZ3cSjbVpb^;4w2K53ISfX2&rf7mn+s(f^C?75!S#(dsP z`7tM@E~-A!kJq%^%8wviH(x%Wl_KM0A?(X*mnNsp1VUc z(OUTir_=qTg_Fr6rPg{gzWA6HRi2T!sl)q}u(FHBPzozn%l#4#4lF<~Oe2NGfa(LCgEg%7yPl4OekIa~U?t+{YD`{3 zu~^KTvyL4$IetyaY)T(fo)m|+7gd@M`K!)PtE_(u$9XBt`Oa76Zcz{&(1iWmycuj> z>U?3|R)r)zW4Q8lzvGk^p>&3Cj^_oGDRb*s({%7#+b!eiLY>dbF{^&&jhdQ)J7)Z| zpV;>}nUJgf=00p7T{RaC4GpVf_U|j_nlQ%ETyFkiYf&-Z(UEQ-x~n8)E$ad*6rfq0 zXU$=fvooQVu%0?~9Q`FU@%vw~a#mk9Ca(7H5WhP3czo#<*9!QpB{K@?v|-9sH{gtaA4I!6E#;PPTN0J2-H!AkPc zcnB^?=5>V&wo;j7j>@-Jcq76PBtTeA7&P(3g5m-JG+}OUw;aQBc}6QAk6tM$9zC2E zP?UZuwGgqqCr9%T(cD(@7ihFWkmOX-?$738Bd|Jy^~M4XanL{fAoFauvxKgemo{U*zuhG5wR1VD87(};lNLc?#7y=W*it$~A4_-E`F9<*nAnsSEF8Yd- z%8ht-Qa$;7hY|56)3|X%bW+Tj>u6SASVUT+Vl}Pu6wWMWeDcH30h8h0Ct+}{Sy5G< zMYlev`HP~NpGY;F_@(U3}`kcAO+8yz~r1FCqMPCxaq)LYmy=(T<5&E*~M93GEvo-k* zhF|JF%dXgZCg$8#n)YHiwwiyH*0Y-zSzmy-y4MRo?P@4UO6b4ss33T~{)wBmwmTk( z<;|2qb%xeIoIEfm@7lU?Dc7&(0DC=(D@f@hrL2Ql+^a+V%*}r#5_E)Uw-(AQxGZTM zzfiI<2Mm79N)scJ8`wW^yUWBOqwNWL_O)Tz!nFZQ3oc}3qLdVmK_My9hySE2#ibB&-7oVh>dhr%L~rQ@XPn=u!-Rxf7*wUMEaZYbzQ%61JTW z@E42QR2G|1!^uDek_vwxKh7MD@nyB(!+z81=J9*QYQDT?jn@l3HD&U=f3_p!7}qYWtjOlgcf8{8t5?jZ z>O12UA!;1+E*L+++dU*LfryID(Wnc_JYSnfS^_uBWQtc;7T?`;&^_|vAgA^A>(x|_ zq2Cf@)>i8)@kE(JJlkv~T~t1|dDqYV8)-4QCZct z#GGs&0+1J?dxjIn#RwV>;(6#ynh}idPYyH-9S3#YRU=H22;1SJvpyj*GDpK*_5T_w zaQ&|?m)F;R`S(QlH`*_%E1K?q0)`YIO)82G*^wF-J~+D(@w5821FfYhH41^ppm=u^ za@V78wf>ZR<6Y0qIS$IkK(6%1x6d{!3#WcM@Gd8=I5hkbw$FJo(d6qJhX>&jql&K0 zCpOH=ES}C!+>0J$zF29s&S;wzk>7q!*YW673-zHxwT>|tUvT@?5n&Ti?5?o3a|A?4 zey8Kh{kU)u`JJ6Q-o7~!d4Wl^{Cyc< z5LY!qCjaWUPa>GPi4$mCdOpF?>B906T;8#m^KjLZ1ZW6m+YYn8#Dh1D19z?rvtjX=lqjQ~ITNVD7l7}`fwEDOBgP_Zq(h{_qDg@ zgpl`>_uVr^FPj}VlBbj2C+YMhOiGZ|N~eCp5!6Z{^urH^^Y^dF3Wrr{$BO$2B zsUZKzI~?KGhT0lqHvv15=kDiM4bjYQo-6SR+fEeQzsgSLC&p;y>#^+yrw+W{rh4Tb zVNAnYumJ^{{^Y<|c&O`w;5<+876fo73#ZIE^A48Ffk(w&ptIk9kvJ}bQ%)Y|!LG0%&hxK*cnEl_yIe*HN(BOw{+beMepf5(`5k*NIM6ECSvpLI0F z9SrkZTjL~){pi$ZY2p_< z0Dpqje5Jw!#1B_|I)9+qa1(!HjV<&Fvhh_j#Cln40C*xkU-S8#e&Nz(f?|=oo!Qbb z1)}u-su&gYp6G9J7-`Wy>zdi>A2&bI958nx^nH0&PuiDE7e%RJ@A#{(&+d;#JJ4Fh zM46@ptChg=j@yPaWdW_96Xs8&tQ?M-Og9bsa{!WcRgnIE-@kztofihm5rG2Oi!LlI z#+HR+ivJ4uyIqG2iVFGTcuz@fnJh3$qB8uIzW+*Wij{_{Ao1hRu`NbCFiKU&q4LRY zUjM&ogO%48tBJ+q-f#Yi|JQ5S*9O2|v>i}~Lk9F>@Zk{wwN$C!juA-qk6hR9`W+qr zve$ot8_VL4DnXJ87IHl2pRp>Bqu;bmc86^qMVNhT)Ow^gkQ}yD9<$@3IdB&Bn*X>5>6-AU{hN2 zn$^?HOhL3@%F?7PuuUy%pFz|u(KzZOPt^FEdAtYLfMZCC$<#Uhr0moawOX=$XHQCa9&qCnm4dP?!Z%!fhq4qm4iwf!GY!_*Mfi2TAQn&E-vgF<;V4 zT=b!1nU#k{j6BT=D{Tp~^A2dlkx+ikBE0BCf0k}0uj}hyFXbF(Fm-pAB= zdJ{6G7#!CsTt`FTK{dWhlWh*m4%!&W$S_Lp-q`UVI?;T>Wn+%$+gJx=J~tbEtol?T za@>?hb{Bvv!5RKDk|UlJH@0?j*+4bB6x3xg$zM)430R8ozE=HoJ~e`SQ(Kl3Sg$is zeM?-1?qIu&CS{*|KZ7Q#RIwDVq^ETv+@Q9T)o{uw!gT>5DTBV`VTJZY=_RKQ zd0#y0GWVhHZSiL1Tm+IkP)5^+@x`auZipgNB;Ci=Nqt){`uh)%_H(Ry1y}B_fPaBB zjm!HHNp)`E+R_w382Qxm%+j$dkHM|uxf8+ImTBTdx&LmB!iGi;62`}Hb=_MoB#czY zvw^&i)3XkB0EYi(PSqCx7zr9kM$iVzTqO{(gHTgQt4zkaXuGS~Q{17f{XJH~K#3gd z9RDLnYq9Z}D(l_AKkZaDbV}F8(a^Gv&IJ=;tN1W5RDvr4!A4|a3}IpIzrC|HXa7kf5Uy zsyV54x1Mxg&;XB2SW&s4X?k+CAvl**E2x%aC8EB{N&=>&wI})J2 z_z`s3%85;VQkp_bR~F9qkn#pgJ^H%estqD@|H)9XiX%C@1_e=VL`3%>LF)k?k-IoBndOT;&e7`^y> zV~H!t#chfE8T0fHPta~xdQmltg`U{kt;jL*hLL<`PZ%9#0?SHK0VpPzG%<>f=PJtg zI*xs~K^ZN;8e&%`KYgoiI>iTFNA6wpJqzlWMJOcl3W4j)=oXqVaxguL@YnU=8z}A~Iec<@_b(`4&(K_3gla}so#3<6g9qK^d-v$s0)X@DOlxSe@1NH3b zi3JCgXCs(2O~Yo6bL>)j7)7W#Lz*W5O=;N}O7v-q$kLrg;SrYfe}D#m?)km71D9Y6 zv6=uea<>cAJ1lv7ob`XlJ_@o-zCOc{i8>lMTZ$~cmsV)|81#wipO>=Ot$@gtcEwgI z%+*hSt6b{g&?ByXePZL{_G?4A3L{Ei*FQr~%y_-qP8Ot#OPT~Yxc5Ye`XHb87)H<} zw^`iIHorD}6nwX@dTnV|fyXehgL&#_#2}b01-PSkTlz(=<3SgI0vg%^eNtMhAiQKg zp|jrU=*Oe&{VThPU&|rMx`ml}seoKr-VG{#Z?aG03uQ5kfH121q`{kPX*-!M1jQ35 zJ?wGP^8Th)96&4~g1*z=F6;pPM?jt>Ck!Bme{0;~=YiG?EGN6=Ab5-A(9&PgR;EIz z^9Lk@mmuHbJOAO0+48}{fmTyl0n)5p@yhHpHSWEuD}L?saH|>FG``HP*~u%GZ2R^D z?Fa|mdRa5w9zlQg(OVsHHpEYg)W!|<8pj(T!)^&Y8PZ3)jgt$l4yOEuIbaNO-t-?( z6@l<}QVT?Va5JPuW}!GvIV9=g(l+Ne-}~GHrBXl{eai-0#or-;8iiR|ZR%XUHiVf$ z%+$s2wwe6!U=sVa>eVrajgdnmeD*0T9Z1J#){k7|;q){$Z#aNjI>JLORbF@&G5qa* zz}Kdyk=r|e-%aH;-tjZW;@i=-BozFk8$cOjl;Cm{E2geljf0+)AsOTUK?&s~~U19y;Wq)C+3&F@EHywzC7!y$9UC8WjwP|}VjdvdnE zF6byFwcy493+`;05g<1ZxMa(0MY%|Vn}xL&fm9R=rp2qs8X}q%(Quc=T*!4lQ&3I> zCr5&er{4s{wgv$J8wl2s1)d*l@jWg^31n0#e*CP#`p~k>njC)$LHa|Ibv+SwOe}^P z>Q6B8)lLKV!Z*u@JsnaJ+cEbRL$R0A4F)|d(NPTE^TNavzMYy2X@NG8gOB^SOQ@lu zfz{$&Gzo0?xR(AwV`?sfbm+X59B<_X>NOfSMt-)V;p~DL+r{iIdf&stC@b_r)lo$> zBbF;1QM2ELk_SYv{PbK|D}r)oNfAKIyme=cIBqTlvb;sk7iNI{SCY8|EHsOBmZs79 zv4;y>_lsYG61a$~2zG;fkGCxeMSQ)LK!9FQ%P73+mKXd+4N=TS8HI#l=X$<>BC16EZsTb8uOqJaDuC zsm8P4n~=IwICBDQ&yOr`gJ(Q_)y~huim)Rze7mM$+t752G*p-CM}yW`g)6N4OnZIT zP02&kp7KJ!@4UUO@5IaFH-3F$`iq{-RSVbMTwOP~ZSGxlyK(WVC)YMZJu6JorCsMQ z47whC&A;v9`LY7+-K5Khqi3;6!^Fu5x7K47DRMYZi*c38|*dd zHa!L6ObEIFa>DwaqK<@_Ci>7LHpF0v)BeN=G0%%i83(M*uQToFJBU16-hex3qL}M=Sy*2NEaYqI z%?4sbE#gM1e0kN%qg$|JCRmwdgM;-3Bl@ONt}JUGeTYF*9#vbQRWY@y#IL0hvBXq# zksto-+iw21b;cTvdWu}!%c42JH0<_rZ7p-#+X>H6AN!f3nFl!ebD}q|4lP*osnp2l z`?-}`qJ&TlMJ%t&)+=BjbTgT3D^bvV^Y^_N=i10)hZwWD8$pe;cI0J!ZI(J*5XAFi zfRO)dghorKmzo__ve&Yxx2X#bDEfZwR}jD$i=~eSOe?yBDjVwvfCpKauFg<9h#Y#9 z{fWDRA5tDrjfn$#^~yJv^~w6=lw2=BtYHlBRG4D)4(uJ#5(B6Rb(kokR%dJTd|s{A zL&#G#$gD!HK3r_EihkNKxzI#SCog4(?EAq;feEIMxezn;IO4@u_LGquEhGAQjN^G( zyutZREdR(wwWF+Jf4O|0q3V3J$18B(Vgk5FQ%Hbf3tc|Mr=Yt7%^E{|&W&}=4+})NNlAIy^Q*hoaGP&Hc-Wrnj7#}LQ4ue&yUv%-fx#zWk`>r_U|b! zCi1ZbpYrmgx9vqDMy|JU#InSgMf$_7@zcHGrOq6xubI}mw8s9KW-9AgJKnE7ltG>2 z%b6R9IBFQ+`Ei5(IW(ert)J*^g8vlD1P;}qvWZzD^|OPRqHj=MT%5K^{amqOkZZ@| zS<9JOgZw)?8b*(m2W7ypTi7D{OjY^;GLnykAC+ACK$ReeW*bV?4ITmb@n3XeVnw}k zl_mdk@?$dI=TY$?5-Uh{x1ro#Ot*t}T2V!_UP^*E;}tnDyPh~4^#;;;Va?H91{)W3 ztJyycnglYH>Zcd`;!ZHu9r7yu1`fs0hK=x(ro67J?MVuN5Pw-m@{-uBSAv{3ae~Pe zD|IX1GFoDa^Rv}ee92uNOG`sz;WnneYtxw{u2{y$GYSgkNXg(yOu_Xq1?NR&2}}X+ zVCRb{N=LS#!N?iw$=B2gvbSD@X|WZo<@36Xd!QdWp2;+*zaD6VH=s|0Sp41!aC13z znetx9)NzcXEs$L9lqE@Kvq{S%iw}wD&Vwy+6J5@Jy|?1?2U0wnha`(HEl(R$%J~Kh z=@m@=^uG?)$h=CZNnwzjs8#Db+V zf5X2K=+#(wLNp$H?gfp?CQ2oUpEX^bJlE8^{w7`k)|r5OW4R;flW@HI6F^u~9*ie;8j zpOYW%XgD}vu2fJJNN?OzES6Vyt@f$B-xCj(C=$L2lWp7!ypjz~QnU)xe^y{-YRW99 z{z>*fcIewWh5G3cY}a7pMRYDSB5d?~rTwnL0^4f$5KsDDigwOMQM{dA#+T0^AR9`m z0!GWCn)NcJPOtS9Q@2n7cBjF{oSs|TF=rKF0EGiMFgf1y`E`|kwsf%_Hqe)*FIx|A z2>DOaV?w6w$X$CQRAeEYHe*&Xj-J_4y|@1369>{#zjT(0|UYB|a16 zQEU^Btgj0v=HpN7dX#;TmkAu2UQ^K7h8ukbOk4b5=b!K&iyd)UlOgP^Aqbse5r5lq3x zP?nzyU5MQB{T$RtiM!6CJSzVOz%B3JQ4!E)#z z22V>n-&l3py0yGLoLH#Xg%-k^;vB?$HX~9bhCeW<^cmBQ8=5LB9}s{RS-S|5}cXp*}?`*4LT7O4ZKRHB|TolQ4Yem`KoF=mz-_?qbzp(CC>10`kp6N1?BIIM)5i;_yP0=!2;ykQ#0|4Lv6I5FL1+r z5>hr9%B{-juYEnplRcovnRDSJdY#3UvBS|Lb(5zs7#XzKPlL5r2lYZ|nNrT01$~Ju zX2{ly;td-AuynfyL_pS%+1NO?_ZZ^7^&I%=YU@FsA-WKQd>Uu#vo=iet@a_?gQ@?3 zpcmAL_4mRD;qCA-tH=!K1Zx zen#|9R7=)|Wj!qzdFU8)p8_#YL^H#rav3*FYIf~`WTdr`DM5Zi+>(7~IyKykly`UjJN9|D0nVcoOZTi?b|rT5!dYTB`+h~;7GhC?iz;WDhWy@IteJ?~@x zyRD;p$_v^R<`ufa0qgR-Aa2aJ2b+dWJ^JmZcUZ4QL=nh_$Edb&#Xp-1LfSu8uvQWkkuUCg^I`sl7WaPvyBJ3=Lqn*i{p~l&V)80EA=||?6mSu= zIUYrox$wcyV;MWhgTMTV!SvkXFyzQHyE4bK1lpM3u_p{gpZy@48}6|TI)h%_W3^WT zjp&m98*_|+|22F*wVHTo-?3eQKR^pUg+)BovF5D>R);6h|K$mjdzVWZ7gT|9y|z*5=ZIIsum0v?oSmuflINBC zrzoqh7@4{qWRtWvIxbS=H%hXzbwM+;v*VQuDb)@$w{|ef^yVPztH)`;I{L}Y%?%}E zcJ7bmH#cw98hg#x1@+Ci%&8P0E$f4Wt;4ml+^fh&^a}sB25Z*)cd{GQ8!>(91LX4X zxW22VWzkPz@t$A>eh{^P5bnY>D5^hM)>EqTQ^lLoTd87(z&6cl-4^EfuWic7{hyQj zMDXow!vSlpzeab@){WlFjWOGdi_9Lzzg%CxI#AXjBG|znW9o;@>i3%0j>T8Kt#{5s0FSiAIRpj|N89f%m1D17zfIT$RM9_pzYh^TOD>pP9|hj*w8GL zXfL&^ZM~M?DsMg@=AE_gID{Yt)L(57${Rs$|G`@mD# zzNBkBs9SoorDrwngfOk|Of^2p%i62XzNNKiwNJ#D5)*mT%eth#FRt(AyhBNS+^V(p zC5GuOV9Gs!ZuJVhvU#8(J&fuC2QIAtt?@eR&Z$4Z$*Yp<=PNCH4NW+NX7NgalIX#! zA}P7_^{GeBZMOE-)>-A1K@W3T&qq&WBqfW}*Y10CD6Pb)+FZYpsc)RQFhBcigwJ_> zWW5<#f2XUhgzqS@Gw5k;ZN*7SN~-wKDk;idQ^G=aK|vcb`u-~jEwHVUl3a~UDGJ$H zTTkY!uCD54F8DijBrEl*(A*-9$v4?oSNV}|vWe2SluYWpeO$7xQo5fzo`4%;KbT*(< zOJ6Q2e*Set#WVVlO!td(X*P;uM^sEql^qkG;8`Q=>`e3KU4470E(peYnrGgpxo>G{ zNgMU!ZnoTkEO;@FBpd!Dk9I^(EIuhI@bxRcjW2c#GopOnTE1$?H8$&PC-O@wAFCKB z^uIxbGuH6`eIeGnp+9oMSE={OcI5j9tmg+`T$#Q1{cyxIq_Px>Bj^OdZ62^di4d<| zsx(ebCsk0Pc6CMOEiyoO@RqAL%zAUJbm4OUO3LYyLHL2YC;=n_Cu~O~DVT zUgUE9fy2C@Y@qQE;5Ar`VsfL@yKM=POX&YoEU0}o+a zp5I&qaeXhm=xJLUPhxI3k33s}A1p44$L-mo){#Z-FrGU*K?|}-tE%%OH#!kga+R;u z@`pxj4Tt0T(2=q0H5hnixs*N3z+J>%?S z*~H!LC0CuCzHW}MqF*vMu6=gZPO8PDnOaCjj>^A1cy>Z{p7}9ap+oWOm?~S}C)&`I z&9{d%3B6yS!x{b43o_{zxVmT#(BqLl~K`c9o?OV+^!k zB%22Mo;l1ePB23EK47nSiIPG>EL)P1JX~=)K zrUXx&N#Gyol@Bha4{iv@%nxKoF4|O@EQKm>e7?Jyl0(z4t@mrlMo`jgi=)gN^kslO zjRjkIP3NxD&lhM7(!9!N)>_19p~Y+=;YH4~Ym3r6{57-w!k`3M=`e4K>9Sl z1M1eG)aeHiMGn~SsK$J6sgL@YjO3abIGKN4UjQ{kP}XQ*p-5U20n@VuHCuw)sJ`SA z*|Pfi$7BFu4w-RYIh!`JmTS1u6@e7-C}_)nX3kbEs|U{Y4yQKLyM zKo;&++UVummP}0`FO=)vRbpb5W%V-kR)~3t;tUt!ywg`nYK49~3|qrjAA_`!6Dg*( zsPF7pzbP{Buhz_4?lHwR)5zEy?Kbyar;oT(!sYuX-1JlGr>LTgulo8)p47r2x^gCa zqAm06d}4=`tgJb?wtnHO7q^15QhD2SjbV+$)KB_z{eh+{n?2S0c`Qx8HC_PVe}1B{ zU16md1S^p`%%NP%qYx-&7&uLEsQxx)sE2PvoV>kO7{fuI&d)TL_6GQ(=Z-k3V%KDZ z{A(|WazM|9eGYkR^ouiRYRbB#i$Vka)9_qD1#88np{fM5?c zrf7NBd92jU-;p03&zJigXtKJh?e(Q$J zZLOjr&|VR$fS@SI5U{mM5d~$QK~azyWD-I+SgTx%Km`SqNkok%kRbCsiiivm@qj=e zDo8NQLlOuidDjV8`+o25+kep5oSd`wK6|g>S z?Z?I5Lx5rjWzcbeS`m4zoaT^(~b9{agQ07#L-pKm`=(TzAZ$ z@2=L~!!Ug`lAKy;62fHr7yP|JmO*u@)b(O*kPCUu&Cbb{T`#@;Jkx;Lhpm3xn79&? zgFj>DmGA%@rFkeWzR}sTx|lCuXS%s(+T-u@rR}^igi&mgAzf@!kX|m^#J-nCNE8Bq zO)px)jza4<6gFFGt7^*Nx;*_k!p77}dV`>b{m0G`(??M?r^*?I)>#hLx>zSGy_NMv z4EC4kt+*F%hL!AVuD7aK_^FTK>H@)JWT_@7cR$CFMHxP0E=xODj1%MhFtD%=$yI&dnt&Iwh(!Vtm^$CTWRF+7i4T zYZ=K@vH1i14^7N82^jO0EyVD$y6I3G^+z>16~S$DnrIM(XG5Z$2FLTDkAv8_ro0^* z2tM;Z%*J0{91F`2&nOH?8@}%4kyCI*Zlimcb!GBx=)is#q8Ff;ntt4csttv;uOIzy zQZ8mM7<+Zk=G#%8wTs_;L8dtAsvhITjAI^=P zBk{CMm+C5h0!$6~`lmrW-cqU^Sumt9X^6a4d+tiK2vC4hk2WoAU7Pgpb?=|VmQS5I z)i?MA+k&FvS0cE2bnI98?LT7hQBlY!^gRDql=#2UwU0@1&ObFIvun@}wie8TmTtIj zRCp&Y)!b(P?#m7@s<&m}s*82~0T-1OjEDPklFyOg6(FwFz4n4b5cI2$i}JwiLMZF; zJkG&2YBv-+bZ5VBH7pDY9RauWjyG+f#G6^%Pcc9~)XL9&z&ERU2ljY}5*l?g=`G#;vRSA;Lc-GQ>Kh z^Uq$EH7|EC1vAQ`J%g9nUFc(U>XWDHx_f*p1VMkDmu318XM$!dLpeDdyb4 zyD;oHk4@C|J)4ov{uC?5+!b%bFvbA__25WdNRR*0Q&tDFHl1kYRLC;XV~vb>?5TM_ zy0(%<>y5&~AeHC*%7Mg{w8bVED|P8A;G1)X*)yn%I9QM@R%daeX%w4~uJ_nraB1}% zbWS>mJAU+>giHUc2Y5a5TC_m@;D20^-4j&dxrVm*qY?*uhI_`doQZCnOY9SIE*If< z(HvGH`zLgO;;qz{X+_NK`xJY_Y2^n>v?VzFFRC%k&^Dn$v{C)`ZF~W}7S5lHub6zb zc$*xP$tg2QfcVJT)|NS{V9H?e9x+R21v_}OUY|SH+G~TRAO_XmV+_V}n_!ZAw{K3_nwd#4mwXF>r}42~)qErcq8Mj9DJ^(Zu&bJD(JST_TOQw7-%k-ZoZmj`Xn^BSVt=bH@63%LSTT1JtPxv$4xi=a5aPO06myJj zA{{h2&VIxeq907!+V|*vhwJj8@)y8YZ*LGRqQadc^0ork5NFOqwnXbOcut*t_^%;p zSDJ$kuM-<9IK-Cq|MOVP%ZrX1jiHE~1YP2&15T?f%!*`5vbIs(8%Dpflwf6OC-?y2 zNmpHbZUr%zo}53xhpv0gf5QHg{Y%^ng4Bm7)PgKy zBkg?I5pIxsUD<}|A|P_oKjB;7oTdX;;`8w2Y(RXS@vsDE2(~vzeDKd$y9H&A0_HL% zX`*Qtr;rALfT99p$iQHk|AYnj|Z2Reb}HSz{EJT=~K_?7^zc zUNkbNpxf{h&{_9wXo8r8;OeRO%ttoN5s>iU2=kDb9m|Z%sCQb`I#4`mH!(5Moz%-V zA}3KuRteiy^AAd2NusTkFw3b-jU{aqjWZ2c8@*#@Jvsx@KPDwrFPlHeZl;iO#pq`; z*A1$udf3?P*;`tz>viJ$l|_`Bhj%N9bNCb0C!1WO8YU(j7FsskIwd6uGVe!UoVsr!xY6&yGbM6Uh> zPS_d~p851Y5ywQ5o0?75Z-%vZyflk8S&5wQ*QD^Qc(t3B7SHE!-nh67grB>&2B)F6 z66JbUb{bQ3u-?c{sF}jZcyHWaQGWfeVuR!@+EvG8anH(w8|I9x)>F#aYd5;r)<%ps zF#JbYfN9owT?`6I11iUzPb)O{{(xJFT=i-{n(#yMk756%8uXJFk9u!lFsjL6R_eJ? z<(c@vJWF^7ufATYZyyxe_`-D>! zPMinXhc!}%wyA~kG~?nA^S+Udxs+~6y#H}YBzZ70mbn1SFm*VAVnSAjAWj6J+{$DU zV#T6e*aI_M!yemKO?tBMM@^Hw6TUd&SB?rL9n1ySLVod;{`<_%n&QsIuSBVHiQ?&q zjyvs=<2_Z%@aezL`wco+*>ohPTm1{%o9$6W5#AgEDz_OQX2~f4RHTq>lJbUrMVk;Q zI_eF^!=Vt!4vzEFgA@)|B>nQ?^|oIYqPq^qxDLG03C67pntiz@`WGi{jPK^!xX8KE zujrv&5x^NE)7AHuKBzbmvN)TwM&kk1dv?sWFmBJRUTt6PQ^sMkj5W)OMm4DX9xJ%2 zb$bBfNP(QUM`gMOl|O({1Fh~^eyQ$=yMnqiwpVB+Wur*ID;==Ijwh5_d02@^5${u3 z3RX@h1+E;Um^aN=z*`SP3D&!R#NlN7!A}T6m$Pa)=`!izS%o#>&kyA%M)3~N^aVQ& zMGfl*x$^|hvUcSB)!W%?C0*fM@xt2W`AP-B2bDczBH@sTFovI64AK53V?*lrfM~u$ zC5wa1D~pyeXt|XVBPsVsePmUi^}xc8@I4ZWK1UrWS&xKgM}Mqybv7sRUj76k>v6DY zj)cwBNvHCs3=k0HSc*d4U=bhetY6Li=kM?)l-h_E9AAmj!x`g}FyI@N7MwVV1kp~` z{S{du+dNrQQRXr8E3mU7i<^)mF@$x-ASh2#A7xtNDG}$2gP_)xC1Oi7;v7afv6-Ta zA$QZd&(Z*_09LVUWBKAQ@0WX}FTVZQJpN|!X8o(WuPf)AChgH=U!!8gkDIz-$ig$K zwX@xCE0K~Fx};|?c|XZdGvL2(1Ja42^}@!NTdiB_+4r!;x_A_c{coEpgDx!_v&QL$ zNWxXqLJ7R@++;(CvJ)4$4T6to@kVzu${q*Pp8-3&q$Emz)BWh=S;y&fsZ7%Re>lJU{dP$|%q(y<%gfWE8-l&DW_ai7*4FA<8Wi zapxd^vzgYS$ij&wGTo9_)mzegWvTxh5g4B*e8+|XHncsp-fZcPMa0vv$gY<op zh+$H=vI7QA)uS3CLMs0vrxIO4Y&0=g5PE<5P2uQJlLWvX(N8J*x#jDIXRS@o51h%3*4wEz7G(3KOpR#r~@VJ zM5-Y6%A%niX1C?lk|&)rEQ#kK>MyIdPvB3E5o^C_&%g_ucUPKevMU8>adm$*NwiOG z`Tz8z%g~bfa#XiHP~J{34}onEJqi1FFMw^oag}%hb88}FCCcG**>Z?Un{Qr+SMa;$ zRe7OrS`FN*}hKbwpLM?CC_)>ey~!x%|Nxs zeq|&S|CwLsFwY_xo22N&`E;S4#n7*M?$V+)X`{52>$Rw@oMF@K# zIbhQGlZAc_8vzz%Ki%H*$HjJB%l4BZE6xJ?6F2_ZSapz&49X0TRGJXIul(yQ;B9!kW3BUYj)VS zO?&jS`Oh&km%sgyeD(B2)}Oup&Se=+f@v=>X>f1gAfJkR6-ZL-$(3nMc7y3CwJAK& zd*-fYVTka^l%6N|D??Gm7_~o?GpRnF%YVb_(DP*NZQhR9nr60Ag>5r7Q^JOhwLKGk zq4gevr3+JUtt-8FtgsIHfuK$qbw;T__l_@wVQ)=% zesZOaO>uCb89GFfu{JE7g|%XvfbC;(%Z=-?G^0iNH!uX7C#Ni(3Z62O+1}eWEdwJg zd;HGyvJ54b+$y_Cxcx;AnVD;Fvbw0kbthJLAy+f&LJ-K$fcaEki!jE2AYOCup|0Dz(2)i~HKp>v z?kOEdLyPKCe%dXmuW)sIF||P43u1U{gWxfWnzRj&_^u)6KxN2cURKw~aEa21jU)0e zM~CMLHnmk^&SrNx%aZOpU%}|HQjq!w3!i6CzRb_f6r4&kMMlCT~lryVXP^{^~;svf1=bE%rRgj-w)QEi8 z@>R;@svij_ZXI0{?OE#9R`OWa4~pzt>pkuaui{Zf^udf~yQrQJqz1`gPX_A_d{mXw z+OS3I7w-Os_GqbZ@Gl5e!q*st z6L4XR4MUUq)pxk)-jZ^DCYuEUwSFqhn$wILB9Ek~^;7F{bvN5i=!t&`f0T!30n2mI zRy8V9aoDz5b(|t>1^CQWtGXz{r&0Nig58N=;K2QMLuD&f>^ox5CwB#Py$#Vq67N=Q>i|C8pm+bKbiG2grdOL_r*?J&;XE zEpof(moIQ_`_-30nqV!}SKE?KadvUqcbZ!)8M|5NQ+#-hM&Y0%{(){GhTRsDjjUX3 z@Mk%0*bXh9Kn?RI9f-aM36VZs^p3QBS*yO4GDiUcmeX7kZ8YWk__Z2s|( z-anfG%&kUEtd+LG81KA!HT`9UjWsRx!b+_XS3$XNTd{?hl-Y8cL3~xVghgdbq-}t6 z#>jupYC<_G^Ka59J(fHtB)fSn&Q(qvp2d1`NK*TIK?15msfEx*75f=`24Oq*?+7O+~I$h#+R4)wp-wE>D$HD zOjtE3!ZgH>V@5X2l~SMEJ7rZ()jf9`Nmz@k({(EZqfgw(Qr7C^OuOpHhVjYHz--+j z%*+)eXM7WFSDOEn=1U&5hqxK<&*sIC@TSKoPAXU$n``f8a*oL||28V34t5S_wXP_F z!lAdR-u){R@vdlZ&$9sx(L57F*0y9?4OB*0b4n34LB+ENjwb~#UO>%E*XlR=6RSPJ zr3ZsvDXIu;V1!xMS`oAP&)BorRE+Cb6Ged?nJxph6-FU*-v^}~g*ORQWc@tdkYM46 z=PI<*{rb}09&GH_LH-mAFoI@u(!dE+<>;6Y&DtO7^nQSogP=>4lw3zJ3m$s<^{sRp z(B_X-1@AU~jrcV%WU?Mz1W~ecq{PxC18H368cuyQXkDG7Tk(|9V?zM7(M-dzf#?TS z4AviO{`N+PUGe^sFb|eb@szf4$EMNL6 z8v)uDE_X+R z@M0F*0>|eE*`#f?zc6JoEQt3ae&O6*B_xB}{BI#(f~Uh4n~ZNIvmiQFQ_asolxn%t zuh}6e6|hx+$Nn|{ZMn5_dB&j$@p@zm*jo4a3m1M}lmCub5aGs|W*;_boA6p7eq1XV zI~s*>a(} zDdGiSY@y~cH8D$Ey0$Hc1Cmd&YVKA7HBb2k0un2& zi#y4d*|vGXvE(UZ7fK+Pvv;7PQq1l9g$9$<;lEJLTJ{?Iv)32RYi;||3b;EW6ehva z{o+18PeMY1Kw)Nv{%tkA{&M}>Ciu4vHXHV~w#dbCSB-_oSFT)9xE-2&YdwzjqlrEsf4E}mCOHd0kp zU2Z74oLe+TtF2*6lQP!2KNns-hi6$LTAxKVQGiE>+1zWpO-E4=@$puIzTs-_zf_N(qn-{l|P>QF;~F87fPIFD&*Fvb!OdgdZK z&y2{n5Je!^`#mD#R8$)=`At;EIv4$LZef<-DqF&$t>8T@l(~ELuG73iZg}Ow``9o# zogT)1OcH+lQpQok;+%<$Q*h@Fm{WY#>`hYJYnIROyyyDpt#bvCh_Kea;^4eI6nY5D z{RJ>{QZb^5w3t5Qv&z4YuMecSb~Vs?jhT-gW^W$sKeOwDcK)K-&2) zwY;4)Mn;(VKfJ4eh?32KrJx;)^SD=6@>9@8hYDAKqN}c^d1UFl+w1ELD@V2YSI)H! znsoBuVE#9=6zf%%V!%CaLV1bXifl$UGnEJI+s$@-1>VJAB?b0nUQIFoGP{HwMu{=q z3xIiRe)vCv>Fq%v_tQETTd{Z>p1jRc`}oDSr2~=K9jxRGspvQ_Z%!h6nVkXIGtK}a zd2{ABF1Gs(AQnr075v`@Lr$aC4GFS7}wB9`p2!vzadCq z|KT--@egWG4VdH0D|{d`LxZrHoQ3b`oulNTjg(s3VgTssj&jr4*{CKZh~n0@dRnjS ztM`Z`OUD0qhFM&T9XA1Qoe)X?5#1AL0kcnT>Vbu>0l6gJ}N3IfcueG#xd|<^4?z4f5v!-rB&n? z1%9|1JC%d=0r#(&wbfCO&oh=ku${58vz!z+#ye)fXyG;i3;r zPhS+gmMUATIg=Izs>x5GU<27na{<>8kbLA@i2yX@QHU2sYR>eX6n>^@r^=~GFI@WRx zr=z5;e+0$p=2-r&Kw=?UFZ>uM+B3#=#3w_)VW0jz{w2?t=&jRtJ7<>2jj(hzhmj3A zP@zdW)BRgqnFK4o5{svB8$NnPT`NC< z(rpmiYDDtF8XRS)9#{v6v!HU92Q0+sxHFEOtz36;i1te#(0r%mb0Hgrd0^t(wsd1! z+S%}$CkQ_t>(cBiOl{0-cES9Z94uJ)q$M z!aA&*VFi|+W~lal)yc(0wh>s8KoWI5;Ar7wo}r~%t5Qj(Q3vhhoWv0`jaI4at*v=$BOfuiR$>3dmbO%drEHJP(7AR3Ne_n6Ta-&hMBgtkY^%8hqvk7YbrNI)+a`wrebEv{X+n911yW_Yvyn z8rft~sCk(uiS8AFSih!|P;ZK|n0J&C2VlV20G|egCd%oYDPlLS#rPcr~QjkYjq!6b3r1Tk8#Oz<3-L+ z_8ATX;3BOhE941FBD_NaI61KL>Wn=YSLcSb9zr(G^Jkl6CtcSyBDf z=;+r5V3EW^EN~V7i|(CftqRJ<3xE38#FkrZ+q}jpQ)d5~fcJwg)!BW0ude8e^7*k> z({4n1a2}%A`pbRdVI}<-xFtT=MnAVucmBj|I~rs(CQGTcYsUb~fW&P)sFN+*1u0Kw zhyIdm-t;3FJP{g?D&D4zWeJ975&6*sXGede@z`$=o&5fjg23fJSvKWdV>7&%-v=d7 z2kqN!L>2rAcxv4@6$SjqL#az=o`6E{f(^k8aC>>V6{|3sTPL+9HV!{`&r4K8zXtwNr0jqP0x z8zT6lJrDymsJ-rp^YQWbTEMeTp+>;UvT2-hYbLX-YOx>;E_%~!6?+lY0p!a;&ewK& zEiSz(stg>W|FOGQgn~HVq3-O^WCH9yIDfLPU|^|+<;bfriJG{j%ooxl`+{o6HYy0( zE?e*G3YXU5k(odkMQHvxO{(>Pgal|Y$R0?Oi=bs6yJLpf1$~}FF4orj$ zbHTvq!dMP(O?2fL9^UI>&d@_P2#>G?_aDGFuHy&hR`@lvA&+a2;?th2q;1IyfY*=` z5-9<6g`6<`cpOo;8Ks%r4MwU>u5ybW0)oP>zz0@!xH?@^WiD zgQJ{Q*arZ=3~8k+l!Zh%Qu8w9Qfl?VXHDigGSQ zG2OdJ6d8qP27X&IH*$uUJo`WP?~M%=i(+A!8%?({qt?(OhOotf-Oo$5^c^C0koEfa zj`Wp%0cH}s!i}G|scpP0+2(q;x#?qh9bM6zKYL@W9VpV)Mk%+z)@7e8m1z}xfRMB` zzMAh?nj(N-cg=CZVA8hH`zlu^MoNC7uI6x8{FBM;45X}{;o7&i`^t_OKhY_E=Z-lc zq5LxVpZ#?I0pz}gDd)-Jz4G#MPar0CF_c8@@!Ji8$4yv93ESra%P;F8Y6_D#Evoe^ z*wenHA^|e~s8%#JI_=x6B#pgL4ZlXfoBpqD2E9rDmVSIC76u5}3m#DCrfzHS80;AG zWt7S;0WpHqjraDWV*!+UZSodLtha;kz({xz4er zHY1|rvzcQ3K{TtZv*(qZkDlPBrx8pxGIZc`&tqdH6RcxM-@(JdEI#p%^A}wlP{z;c zymOW}x^O#r&(X3lP96I%b{%j-!2-V6IEBy3(BQSP4YebmOxYYL9|bXMi+2I00I{hE zlpVCXeb)(-L+|50uGsQdhhD(6K|}s!(zsSHN<+fH<&7hd8*3br%U_RHVqkXRNBU9k z#5tqpezKX`-Y|F00{a#F>W3qs9w!@)py86O_b3tbISK;@rfSQov&w?Kvb)n7W28f} zmKY>Re0T3Vg*^(lWy-dZbuT?4#yeIU6V0QWnF>gpilKP82S59|(VUU?6=8gmc=1|! z9-1V(`De%4s`EG>SM^7O_7jgrQc?-#99GWURIczmmWo|>G-N#9f4r}lvR2!<>yj*K zlLxV{Wfqeb4=pLWj*A*@x;No*E=5~+VQ*^4tY=Y=nJy^ z^m77dcX*kDCoIS_zgsNw6M;zL^1-bG3}A}N+ax`sCivk{cGeRDNd58FWX8D_8q=a_B$ z4|Dt8doPwP*w+laPe3jww8|~Or2!YpALz2A{4t_ZSz$)(;Gp&`P@1rE+WrX};?i@e zTL6+`zFTetA$E%MgiYSq_8(lW3y})gTj$P^Nv8piPplk(1f2(}Mkh2Mg4^gSu#ZHF z7KD(-0dVEYVX)Xo1XnnD0=|zIw*BYSCbtAESWBb_=qqRN#m7_%WZj zfuEHo?Gkl1y)vHtFoA5AbwokKu{UKbwwOsOWre~QyO6l z3E@Lxs{U$+lw$*B=?VRJp#a5>J{?f5?+;_&L~s0LUPap-c(nPU9P-6m#kW(fD90b- zU@<^mao~*Q%t?^L^2GG_mAkn*Z7H1*E9&e(-jNlx5WNYqw^twL%Bdn2DfQxYJ8Tqe z&rH5%r;$x4X6ot?9$9S~=Z3U0ndKPss+$C{+a)Yt9hwHx^?8zyGexxTY; zMW|LZ|jCn69$}5 zwWVi{{pBOb%8c-i3{7TmRP_Zl`~AH_n|7H581zcaf~66*64*p5`vIbJ?%FrdC$Vs{diNM84t` z5CdKyFyA*bWr#C_E0VJ5vt9Y7EZ&-Va)N5G|1`ImqVOo*ieytt_4_<6so~8)YmNs5 zQAdm*w!dGxmnO#Sz?$&<+#@Y&4%#GEuroUlHK)EH5!AmqhkAfjw=yvJ>&~p1bYuiQ zEBXVM2!DC}>Z|mi z#R2w`4SuWm&e)&a3;<*=dl_jxjN!a5Mw8jfB)MV5;9UMSwo{kX7wI=4)YkW(QofXTyU2aN?s05T#x6f=!+(dt`E z*9f+Xi?k58h8f;}BbFStfylq66qg)u_w-i!k&Pp!c{LHiQ6K;8ZMNa%?x9eft#zX} zh0Zj|Fi926qM+O&$PzE4T!~e$S=i9kQSb_~WwF3cwv0>U%%G=-oiT)Er1RDla&`0~ z=11Q)NT}zsQ;@}!FvFQ^W>uaL(|O&{(69@xLZt5$)B9aDE}lm_bBq+i;28?3p_y19 z2w}44?UvH5h?r?Rd)ROuJef6hDTKDs1tA+<@v3okDcAd>qgZ!HFwHLFFr&VBwSU&I zfcE~e15E-mpc$fgmHbO=+s1ZDDcHEtLt*2{nn^+835`cJwtNHG=%Nl!maQaY zYpZ95UPTM?^QWnv9=66XJQJ2Q;Z5Tp7Ncs~PjS=OqK`lp{bEV}nCJ95* zp;Wa^?zV$z`sEm|zfSOccY93y_BWApLRm9A(35hy5&a`D|Mm6#Twk5e z=KlSu4z_+!#_`muVmgrhk7A0p3&@S&oj*}P2wn~_N#W|>jrcXhu5sVp-u_+%Tj12W zARVzlOL}iv_7d-_~8KhA12TO^I(m^MoGczQZ+!5V5=1&YkF| zXTmkXjgs2mkOfxik?$mGXOE~ybaE`>i|$EnKREq_9o}E+FS?Aag&1jtsPT7=G0YP0 z9x6dWH+VGJlVxYT#G4ZJ)+4UdA=s4<;Yv?BVQ{X*212M(p!*BK$hfp2$k0*{9x_2z z7Vjii_gKQn2s^Qr8O+?MHd)SjTi@O@DSB#Wg^03iGYp|7!#RAyRAEn)Uvu1&e_v?p za>Pgcd#e>{CVrE5L%`RipZHJoOepXL{IQr6G)=RA z0-re9_Ar|S?+f42OD)0QUA~6uS!@1q2(^HW&m6Q9mWn49;){oXr-I-PZSl^7ue(f- z5xC=6frXd3g5Et9qxTM9uD@%y6lCSn_w{I^$)56%Zeb)`Vh0gSkk${A6V!U3qztq-|K~f(JKj4ov>xDTLPhZQZ z44tT$Ks;TRioBEOY^_QoCwVd>ESYr)BDQkmyPu?^rtfFfG9QTg#uT1lrs3sho3_TQ z{J-1FXFrC-L!7*sj3qMU@nt|sEKwT82`4xWCnjFkk2;8N|4}@*vQNt-N1J^uF@*EF z%0A?izQBr6tu~E8x)owSP^(*p*+C*rUC|610hYK{z5~~}U(yEg>DxQ`iYPz*26N$| zbfC}zir+P`@fItE9P_`!l=i}fp~)C-9%Qv)*Nl8tYSA-PKU;=o%cNeqi!O4SdmAbi zehBaUQQ5VhTUfyoDD0is66V>utN)=KxaiRz^c<_BijrsV10`w7n$|+cl1(5K@%n@t zHKSN;(WMm1RO6Wo4J#(*xz8{Wu62%p#>hh4yyhvx6ZO}Q?C5BvDA;n&wxzc_l$3wQw+AZ|tUP(Xn5H`<)zgt_ zz8Qx^bI!~NUdDY2k?DVa4f$GoD0`sJ&Y=EjZmm^9?qE;8`1ows>WZfN_n*iFi`UqO zSbZx!ny#f&Vc#WjmrT!S2eaO1I(2>h1=ImuHKDqnGKyBshPvHbEhW07)&zWyO!Ic; zW*kN1Tzs%me$E&*q{zRX9>Y|wtJ(z3K>zf}BI-c~2Nf%!1+C85Yv$;D ztTq{-jPsjiwhn51a+v!+lW2g0wVf>QFhQl>!72q7l5-$Gu-obc=D7s$d}v7KDPMBO z`Guerrb7+meH<7w;?z=$MgrVit{>=j<@yBnG#kA1}+|M(KVFK^73jaCZ!( zpTIOcw3Rwy*e95$mNAPJYtxo92tiaE5gBMj_xftFlfRzBS4mZ^slHkNxRkH7Ah7g1^0LPOB_P^Agy}VR*6S@;E?1s!Gpm3p!k`gf# z=TAG{Vjs24qerZv6`l>OkE?VMchR<_9imOE_BR+*6|&@7o%r&Bu!FV14dV%xS-FE2 zPRNSuLgrfXsF|k(Gk^AKlG8rD|8140+lmXt^eVo_q``TH;Cu0S3)(+Z332&;0OUVq zGc6%$t+blYgnCvoE6Gf*T)9WiTTe~6oEb@H-(jnhA;}4%60jMIFASx6ryLtv^Aj>~ z(6-tQc7Li@*XVKqY^~foH0W1xdH`Hlca^6O6KCjWJ1bt;8Y+HUf+of>(qq0s`hgKF zI~$s-)n_LHf|Ip0sIfpD)IH6WAsd0GpWq~pNY(ie{5i^Cs2FCfE}L8X{g53I;@42@ z-Buj9vSM(ppIlhDby;P+^^|PhgLG@WkJw;SwqeDM7@TZZ-=P3sYorbCvnN@X0%|zL z{s_^hw3SzctGf`_|36?Ra)5PF|Ne`{MfQj%R^tS7JpS3G@GnGj%$4&uTaUto?L#A{ zXhXfDii#)jO>`2&`~lvI=>SaOiiid%l#2b@wxn88`+n_?l};}hj6SScc3$zMj`(Ck zLjP)WeMr0@_L!scD?+Z7$5=C8=D%eXDEYs}BDg;K^eu8cn-numL~N4R1>W(u9Sq{i zQ8S1#0G=26%@AKou8J(5uReQL(VVvpS3lq(WC}?r_c|B9L@PmC!yN<6`B&L`WI6Pm zI0^#xu`@`h6YQ`PDnKZ{sa7Lh5x5&%Q5cuxXVw!gACZsD=2M1J`|-N+d4rWt8S#+Q zXhI3IA*?uS2eCqB;oE~C!!3OOQr}fWf9xK()Z`}FS6TQ_w&K1muSrzFXng&K8qr`i z!K@eQk0V(VxfLp5K1`&qAS#|Pq-K*2uT5DIHHE`%xe&f;Y@zIQ}^)K=d0 z)6Rn%eEa+k+xk`GoNT{V`*7jdF5m2aU&1j7&&u@ViDW`H9`9Ij)6cV1HRaco&05)q zHvam1P~eaM`PI*K{Eg{$hXOUblI7x?7Qy%Y@6rc){k>K9`S?Wvt2o~O|4OR zt#eL$51!iNI}lF4W6C|2xHgGkbF=jW@``%Hmy$x;P4`WzzhnwWBAl^A6vTDM;bm+5 zYv!Y7pD8MsUNB8Od}3se@hrEGBbylMdI_*$98SPGU>x;h%R|f96*|>l`^|Zg#GiJJ zbR8<-yd(6Si*LuTmzLkY%}~E$pK;05U_t1`$cq(82X`QDmDULg@=kUJ3vb?DPGD40 zld2QXj!!>-aje|DuNYan^YL8FI#xmR%eN;VbZy7wKhMp@yOV3p#3CkD``;bB@MBAl zSAzSVjKJ#hrg8tBa139&acG+kreR*1b?+=!fivlads-}4Px{$s&s#=DvpF$eKi{Xm zyQ}3^T2zFGr>h^$a9@GP;$TetH%cv=WDI%lLdLp7=gS%AVt^hn{yaHktcR8rIr4yx zltE{Q-w~RVQ(ECLst!T+0L!0o(REUf7;HhZ!?}LvY3Xc>WQiRX4gb$T5H7ZJeUf9A z*Cje4cVfg^Iou|(UQq!~!@VMoy3a>HfwLGcTu3~p=B{pb0ZFWuSSdG?I zMx~eG2#3+(bz(dN*7TQ}C&i`!wl?8k0g7;HO62~QY^5MGv*IFUV?X~VES03H-7QDt zNSGvxXgZ(hbV5kjg|sLq>ZcD2oK`9Aao65+>6I*Dg} z#V_EHVcPG-`6Me(x~uPq3z8VA1P8aTU~sAiFnX*Cm+FDIN#vi~#$LK7mj2kYar4)l zjBEZL?%o`f((N3j*YZv&NM>ia1)Mc+HxP(fj(2~jo6gQ*>&#zfrM%>*8Qj*ed`rx6TXyUIqaLAp0TeiFOzkBDRBFKb$p zmWitqAbdS~NUS%^SKrX+?tcw^&ReyGrIv}~R0qA{|H0Q^_MPNK+F(ahL9ceYJsQP6 zd~POBm6Ou{KD)bxyLx74wTDe&mxun!i{$VO6Mn#Gbs*06ed<8I?X5&hFnV4ppI#2% zfPME;PDJ8P2RUJ%5ACRAXJTQR~C2-uaXdo5NsMufSbeqJRp zSLMH7;SR0Q+Hbpxma6`oQorGgACM;RU=6`zComjNUO&piuX^%d9$7p{i1)K+Jel-IVLsJ}QRjDXr3(QuaSm7<`zyY4;~vJvQ+uM0aoiU9 z-u@kx%t^P!N4FJPYlT7n?ZPL-FO}Jnvh?yFQye4h5{E}xQCn_YJT?vb7wH|5 zf^`b5aSOJ7KCNM;tPz^OY2tBHQ}?rH@4x*Gc8LD%JitU;cdZJR;0(+#tH=vxP4I?O zTp~k|a{&Q&M^%IC8>R8~*eF(8a`fdPx2??D($OvWp}Ga(y&W3b2|I5eFSs}QWA*vw zc)24+qsBu+Lt7OT)OC-V^_2&CbbiM-F6|2{nr~}po*j==)z;d(_r|qr*Y3H^OCVfK z(_ph=vYKylcMSX2OT_nZP9^U{A3Z8W*SNfe;hJbZ5kYQo+||;#?(T)eV@bU^LAN#s zJ=ogQKC;ra=%K+|=ve@Ho zW;Q$DvX-D39Kh-@KV~xdR`5iylx(OKK7CVhk>Oz|xYb2JICrx1c67JjB9ErtZAgRT zBp=>8d@}Wte$BTY?(a-@JlHyyH{vej!J!40HQn#rTG!sDJ8;%-By%Epom@sn`^?RC zh+M`5$xw5*ac0)=2I%IZ@-bR}0rRWpFTFQD$fP?hwPFuk#O;D3<48M@a^3m-o!a%x2r&>?y)- zA942!pxTNrSFqH-`EOgSQhpc>M;Mco|BHQn>MDj`?=6xC2=Ejo|I| zv35{}$!8tlV0K2a>opKlQC6n2-o^&se!x#ecqdjv7W7?T?agvH*EJI-;0U?H@RAQD zY%|_>t2s*&i!+J)ebijfNbIJ%W`DCY+&_`INB1k$zmFw;gzJOvrVkU3#Eic5HKv-( zw%xmYvdnqmi-k6YHh=4HE$-}f@Y}1c;YRJvysXJh_u8lZw_B5eg&J4i{;H+v90AF! z->Q=)Kz{!4MmxZiv`8cV-27MQ$XHL%yl?5;m?w)Et}jC3JFM*66w?wTDJ_gXMH>=7Bwbp0@s`+Fna`F+op(=pPUIO2%T|9-WNBIe%Wp(mjtaxr6+BZ6vOZVV=XGy)bUZ(Y`Go0bd89B9`@txCm)zq ztcT6Iy(-4obrySUj`60dg4nL%^yzf1tCB6n| zD6$qYnZ0)JaPay6wjjqiF~pu9(!b=_yC$MY@z}Nf>iIhA=;d`viTaE)lWmr$|S!3HTNc2Gm}}j;xV(Gy?U*T0!13d2~lZ@JHd@KO`Nk( z`-z`*Vx;5u*?WDQltf0)S?(rd)87krw1|B7mF{ZUR6JH|woj{-5s>Ke$O+Lzzc1`X z95aVE7)vQH9!n|@c`U3wp13}7++f#`5zcaxMBQ!J(Z7S59&94QR< zB{$_0lB#lA7`GqSsPlIArk^cp8Nc>8*%F`3W{QmiT;2OHi^L@C5avoXcVq{g8wv+P zFMh$os_;g130KXyI(@R}HnT=;^6||i6D{Yx`+DAI-nfBpEgh&PoJ({J?*5Q7)}5EZ zR-`|jJnI$ts32;9--2EEs>Rv(LXWR>u3C!9gCK3ks;T8TS&t>V2;5yf=zfaBC#7GO z-Ms_e3DaXYdM4Srq+1@z+ip5rqQwlY4016pC}{7M*=d*D{XoOzQM;orvy%;i&wBy>c0Ag?xAY&KyR9eHGUH!5VJlkJhc z2FJI{Dfo3ovDEz`9`fIe&Jl)hv5`uSv2*uGCe1f!vQLqPQWZS+E>$R!6nl|ZXC}K9 z6{t=MKKryY*lR+S9}51+wq8Hb{@{O)?8;A?s|}am4R!L{7j`VMr-xd5YorP9Z+zhh zxz^~;CSKM479+17IM$A*R7fxv6i4ny3$lBcy%I&Xm44hM&%@*^N0N%KGky0hU4C3+ zsBFKoiw@NuOdbByQX<@54mmzMt4RBuk_5%+;+Un-|X50VDu7oTFeyfQiiFU3z6irKce@t-6u^h z5ED^P=h-0r2fs{g{!T3`aKI;2qlMqcxu(XAM2;qAo!Ez-nH*R2r#aPXwM&Ex4PeF7 zlovadfu95gH)i#mo$N_mHlN^MF_zCf=jWRk)xXsJMUgRuZfUfW0Y`ZJQ-NkS+Kq8ak!07f4c3JVY zY2v}vjn{OIFh=@vZXr9w3)BA*-oE@mOZel>Pm6XR<~Lflx`!5}R%*7yB;^_x&@g+f z+IU?TKP@AE^Ebu2CGVJgcZ#wvT^Zhf=s<3fr!TfR!ydxe{FN3zS~64G{~~0j4Hp)V zWtsjTw!S)`$?ki6fQYDoihz;^f}uPu+9wy=Qq{tQ4k8VMplWm6Z>? zCxx_^FKBaiUh=&P#0`q2+$y)sGpqi_`eA#6OSHPSU69^?a5<`v$lTls8SZGiM@wIK z(Z97U{vjxOLPxv8=hZ*^1@ic5osV;#%rQZDU7vBmMf$juRv_UH(f{Q6Ys! zG>i-flb$Vz!K8J)9OK$W$=a9XJ4RDQP5KJSw^x|+%C$LjDVy|R0+6hiHlPF)CFX8a zw+?~%{#u7Av+-N~plsfqMF`ZLOG4tdCOs7kgr~2{NHt*4XnVvdx?QOp1{Fa1(^-zzL zIBIzc{MO$ij5hFlaDLkBCgNZbV4f78>8B5V<&^5!=rM?!-rU#|I~UXn+< zulV|i=%nl1dhswSwjy~f(doM$+Wo1jIX`G?HZR5GhR--G-9uOR##DUaOD;%q>j z_;Tu<7|3G}UE@BA$+})hxNYXU4Boe18ZdTFh+O=jpB?;kS^pwoF>c1*(=_q5uFhZp zRrPvO-MCP>dXf!(t%frOrp zLEaR*la}y)BW=nGG4e|(ijtp`MCD7d3mG%_4qi9jr2X0+w(Ch=vj1D?2ElZnY>;c@ zlAUYTABV)(T%X(xSzp;k0%IvM2V!{b9VG>nw{$m;(5`iWQBqHw8gcDbxR9OlGn9_m zM|Nqj#+0epm0u4~S{0)HW(+sjjuB$h!lE-;th&HQz2~9Zaw@A~9-*S#jIRL=sP9zv-;^*pK z)4ic|2o6%rcqrO_PIiFKWRX@hS~nvXHuy>>X4f)&?u`Kzf@r(=$mj|dq=PU}l16}R zCoZN8lIFa&M(V@7meD8L>wAaW?Fxj;h_!N=l>)FkZ1*MC9osI9q$3PKnJ_Mz1j^@%WZLC8>30DhGZ8oW$c@z6 z^=CkmMrD`jSCiY?`sY=yi-30Q`SGw7+edLcOAYB<==+k{D-Cm_?Bz-0Nf`pSOT@;L zR&4#Z#$;`bKjXn8YF|0eRDbxm0T`5r*N$QNm@hZhD6~h$s~LMU0+pasgu6D*Qf;yt z#2Zvbpu0HJR@KSm@D&E0-`@13pTY#aMUpq;Mh6IUJLvjA*yE$rCQrYBIBD?@qW*Sq zv5G0Q4$xAnmTvFY1Enx%PyZAZ(YtgB6hG|=+MJ^Pa_!e#r>xQ*mPIS4!oDtUty8jD zBu+_7sWhGXfo)&+eAL6l`_e?z-@fN0@5>!?(65xb`AKuf&TV_cBoc_}`8G3y0$nm` zzM|D9`%jrbO!M7tT{8QVfNkb9m}RO+ujG9DM`kdBDN%O+K5b|sg^up8m4)jgQ#A(g zince{JY_y!D>)AZ{kye)|Pxy_kca3ls;({5v*Z zw!9!ZElNJ8vJYu%6PF3xT8fwH6Eu$sJ2V>z%zIt)QGCweM zanWQDT%()HZJ+7~>~@@bt`H{fYmfhfBygUuse%sp!=Ulo^uzn{lk$1GM-L(t&@qBv z?2<*5*=}u<-BkTG)>JpQWU1Rf-0N1i*Kr5HDE!dx{RzWMEkY*$ZZiwaS>12A5|N{0XA& zy5)ktSrZa%IdM}pqM#d+Vby`S3)Yac00YiV5Bg>_a(ieXfv(n>0F6s$EV6?IAX!Df zLzSRin$2=K4hb6V;8LPifiRvjrKfEydd~O7ff>EG!yUI@pW_V+5p|A>;t|?odT6^Y zUGs^ykNA3T_7g~?euR>6^Pe38%I!&Dr|9!W2L^dS&;6c#QP3h8qF*Mvj9QY#c0K2r(f-?q)C-yMa17_r!mD(%(}xh#!Kc@y_W&W^q-5@VWS zMg<+)et($FK5Hi4mW7)?5c^&9G@KqE32w>T*04wX@gHl)7bkE7m)(c%f(OU0f*UL) z>Uw8UgZNzpN+iGjm$UzOi=bLm8i}{OG`ap$dvDyU$Ozch?I-PU{I8^59atcZg_|u= zT`dT&t?kuYS(L#qY%MdUjbalXuW@@Ah|McrJ{@aS>F-on++a_{)r%XQKZiU8>v$QG zVdqbZ7O4LvFk!sWrF*UZd3?vfO?0coA(r=qdEFCMvlX&(~k z7P-(>e9C?Yhbx(AIazse`svN0d!5742%ongF%2RtdyU~n8bYi5l1w*xxX_BtDo{*->#gJg*0 zroa9ZW1^337rt6lD3~I=2Ekom68WVhl!2IkS@;S1yQXh%lJ4$jK6<+{N_@zq(ip&~ zLoPR>rNu@Q-_TTfI<0>QK)?c1zV zVC>ug9j{jG=4$bWW%cMBDRS9;7z`R4#c|vK_bDXbBoa0|_K~H*a0&>RG6trO&**W< znW|ROBWVPaoXr$4l0ttCIS`LYIC>1M;Tx1L6uy%A$FOoWWCjjR?=VKe7r;nXDdXg= z2?OW^0D{i=ou2q5R&K!IeNiOhOTxn}wYEV?@pB?0xtH3IF|K}dJxkWw_mnFJEvXkc zIejF)yR=QP|Mz4)9*5e zH7>S+zO9~~Y3^7~v4L$VkJ@PWd ztJ*SP!#`~;9X2oo%8H!f;gytJpHx=JM(5?Y(%FHEqc0c~#>lh$XVhSJl*R;B;4&9l zdO=wd^d_-IM)Wq6qxIG9@h6YW3Zv4`O9}|0^yB2gM!B;z%I#T}AS{+PiO$#+{`}v1 zdjIa*!4W$n3=^E}FE&4zl;GR(w=nft4ve zrpwufZye2Z@@K>;E}#63g$D9~dSliLP{hDw)2G_u&Q$tvL73Q$e+t$YZ9_M?PMp6MpP<#?$u=fDn9~(FWgYgBII}q6$O=lEo)~ zUhwy}(xY_F7racaE&XVo!?MT(3Ajr`>L|DS(s2ob$F2kw9IsSwZ(u0b%Z*i43+qp` z#rFTrV4g*F#yp)0usPFi1EZOlr!>({Co9-6X1TBG7 zQ3L&w@?<5wS8+YXR?Tmf2lmek;cK@R`#zV54k4h_MZ@U{sJJJm{(?=ApVNb(*KLUg zu?u+lQP7EeNg3{8_SRA=T13!FgGk%ES0$PWLi>>|N&%LTwZF+nmLcN1T_eNK>B%1x<2wVfz-pKl+o(a2%;iVaqz|aX*SXpdRQE|Xzpm9{@l{Jr zmelo}K++HWQpjZy2g6aA{coAumok|o%RoGRdO$eA_=L=qfW5d71Wuq8(i3YKSNmHD zU5ww=Alr+#;?&{JNp!lJQ0l`MjV3)T`7cZf&xqjPBZBV6@zCeYp_w~A{VvEynhZYr zEe8Z>lWR4^YuZ5=dQsP^{Ey}|+Ch2`ev>l)YOEI9n9VR7^ zt|`wwV`hAr7ZUtf{}!Fj2&ev{R|ApT6$yfkqt?{G8*#Y|sS%;4eH-pfp~^6R@{GZh zfDONpU*iWlyQJ2ZecWZHUpsSSzEuS#RufC)`f5kf7cU;}6Jq}6eD(N|VUD{`g7+C8 z3RM_jqA*=~ej+E^bT8|H$?#Jxq3@5fGWA}5^7P2<;D|k^ZoJs@6moKYe5zA~+~uL1 zUT-im{R&I;9~YDmsz0AFXB(Ka0^0V%DxdLh%PnIgj-rDzCOhE zSeRi^Htjo-mEJuNI{u*0{JbZ&T{afHHY80FuD;xz za+AR&n1d1VSN3q9RLJErwy`wy`9jAaju=d0iu@w=UPG3SDG*xY=)m)^LhN z;czu(mQ-E=TLJ=?7AdXF5P{AoiDy+|gx!Pb`N1Fx{{t4kzorfT)On@;K42)H#cb}h z$ks28!3~^{suMA!xv^**wsg4Qf~&#&sFAAAz^AKLrS?cyBOwoHc~0Sw#eu>6Ms*18 zs=oE=%2oJ);15QXrUTtcJ-p0Eb-@AJAbsu*J;@W2jj}a>{}dIlG<}aQ;*X(-&3`!g zvAf?yPYk@+rXLZy_U=U)F|}|ZE~0Wjq}1~c{9AU(N?s)4_bit%s7?+y+HJYI!jY%i zh1uz-d3PUO)P=zoIJ|y_8bxVbBwvO=qxJJSE&hdScE-E`3YBjaKJOH!XkjXSee}5W zmr>>B4zG85P}|V)5b$P7H9wGk+4nN0M zzVwO8hni;H&9VK3KQ)I(G_k4&A*Q)u(i`h}q|gZoj2Re&~PE)^D^~ zG~W}L(tJ#(zmcwcg97e!aj=*E5)Jk>;ppiBkH?gj8;|h!g=9b$3ZDNi)_)G zI^WQLcWU={nB0xX8;`<_H81Y|PNpB^JZme-&Xc09;nJrEc9n?|uVhCVkaCa`c& zRu9Z;HZD%*_~N&FVl#XNF=m#_Z;OOwz&x{1d8^10H4b)Q8W=j zffI!p6EwF1#;-Jq`Hs=H>vK-BpSh^aM%XcexahJ;8Jz_egA9nTPB;h*tY$aJYhzc? zgGLB@5l#q()iO6=;K;OeD*Qy##(ch}`TJ6W5SvBEAeg@@tXnlya>0I0sA+iqz0bmC zC34tyS=iD_j(h?FeQSA$L9hSxl>Ws_9Hy+u1a5J%zTX6tW zyv-T+-^{zlo1mE=&^y=6G%%A+8eif3%avHs&$`0-fE&Cu_siUAj$RCGoT-Vo0z(GR zj4UjZwrdGObZz{ft){yT-CxNA6Tfg%FQ{g3P)O`6`}RR_WkI#>dc!O4MCZX{#?4l@^pz1TwPNKCX2atN#%bG-s%9DJO|B}pxLnTeJBh(e11>UT0QXYkWqgx zouFOBrH#?3@kJWAFE5x1MRNH6&aTIF^G8_v@=` zwzHQFd(aXKaS;FRlei5wfSqSu6fGa+r%R}$THrP0y5Qd%Rg|B1U zdDO2*ZJb02~NyNl_s zd7El73^S~RltDET%rT1#oqfhl5ZYHMGr%0dakTTaf~gs*dt&-2HxEq|`W%&Rs2-?GL!cW`cXwqZy=MM@ ziz$ie5B=9#`vA?n4eU7z4M$qCBkkD3*IsQj=uj-|NFo+ia&H@K0cyUr68Xfy_b9El zoo^&hJK@b&B&xXZVS0a>20YQEEq}J+r)h?nX}*VKy|APduo}^bf84;(=6KMC`vaz5 z&h%HtegH00YQQ#U9&H2x2E96?D-$wEKuQ~toRK52qk)^LGx<|s_b=|NPxrqN0M6>j zB}#2>ADO8iN$m6OXKCxp&}acyB>x4;C;!t8@KTh(iVa-yMm$)Gz5j4WI;@CPbAJW_ zRKK}mulw5Ize<8WtINBlDC05oRc~wGSar?EqZ_03f5Ci=#Pwg3C()BmX4RteJ}*8fc3`s3lBzR>^7j?{P&efdw9g3JFs z1IMQ0NTVd{(U@C?1YiYHbm&kh|a z);H*Mgh|LW19u27I$F@9{e(BKRIHB+xI-TQPjN;)O`~(^|9aFE>a&qtBhOM!#Kw51{P=M%YXY4gsCy za{ap+N_b7KiG7#U(Y2LCQu1NA<~<}dv>0Ue8*HXhtKxTk zjLRr9*;WE`mzG2lY3;*T)_7?ZUsT4A&B%09O*B=SO(Dqz%>o07!sb@vm@EB!ME;%u zkL%#&N@jWs+Ylu8^YtyBZ!d!ewZ8}Qh} zaQBo)C+O&kqVV801z!HI|AH@hz#YG}6-KxVEkn(h7ce5|l%2d^BE z7=rmQ`o0HH6ruOBDb&}T@=1omJGT-31CLL18k~!wQ??bh%k~Ehd{8Al`*p7 zJ3n=}Q9L>HNghI>^K_k2GyeNaq+P?eL4mk*WS*@|pRyid1(f@|stvYA1sgNl z3;K-=HNSbp&b~wYjlSeyk6akT01f(}q%}Pqfe7}mxj$+Z{9g0t0fd6CN`pWA@BP9EZOZG=2vhLD09hyZ$j^xd$@-7q)Q~kN5v#X_1~KQT zUQ({fx73_3&;asu|Iq9Vv7x}_3b`$n_8?4%;{cma{=}$(;f}(X2Y5IaApTPDy%geW zZ+8QFDf3a1;wZJy*7Mik8jX4YIccqgsVfL@_4K1;%fyaYsQaG>EyaW5cue)sT~LI& zAtMkz3NGViI20*^LqM^UFk{d?^)P2l@qxKGXPmsTjYNmrJq{Kd=P2^tvL)}dS z_g{r|xB|2YdfVbcW~`4bC9GfPW?B2{*qe=Khv6nhn)1!21G7z`T_=Gm+iT7JE`K#t zs+58`8KnhKpyn)v-M)}^zvl3~<1f@f7v!MW+3|dhK8(!5J;3_gFL^1fiz%MDX@C+L z-C(knM_ST(1>9`jIc;2`HTOPrcKd#w%?I#KQT=$mORz^diZhM(DLw3K@{)?~$HM$w znrr%|1@DY`;g`ZmmHe$M)`s;Bwgo%+a}HHEcgjdXk~@mUXj{v<1rC8sE70dlspZfP z=X5$YXnww@6Y%GlgrX_E(ZVbU80D*cnR0iuKSGb8D zKg+*%2K83e(AWBVb&<2FW)opKG*MWwW*`SG4d`cO3Ml5EGjnnk0wpy^UH12m`LY-t zfxTT=mt@`b7o>puLHc9?ZY-B>wfg`;K02rirQTL$nwciIy<&SN!dU@rht71w<=Y#? z(=6-rHi)6^i`3++bMEOLb@58Au76d(A{JuC>I)=3Vnu}vX`A~eYdtcM$iQc`{6{bV zO$os5x3h3q%?1}hVq}rj9m-BX3?ka!tw?00Dige7eSby%3O~)-|Fhtvd32@17&f)Y zXve(1089W@sVIR$y~9RF5QFY7q?P;M%&{pI@089@LFlYBYM5eWNZYeA9cl~D564<` zOUF2_BsjZpLvS_QH9dozA{O=|Z9uF8b&l5U|0Nt&=m3wn#5-a}BZgP-@hiYAK7^Go z%k-8DySv|dC8r%L*o$XN@}H|mC&`PVthBc(1$w&SN&cfhx?J6Aitv=q*k7@sdQKIQ zpb@v}M>=R%-I1)+(9o&?mLyzlrsip@WS#upFP`+%_iY;yOvxN523~r4QNToTjO^a4 z)=<_7t99wLGpRA#;{sQ7+uC$dr>2Sh!<`)O6ap*MV8u)ID_(M^H=_ zUGw}q${SVd&nh0EQ!2fRNM$m_qSdPGcCpw-FI^j7OS?3_D94pPcx!FrHMEWM7R+?s z)2-JGsCl(%NX~@LxdZOE_RHsOUvrLH$!#lJe&E_*&Uas&J0ifC10_*EaTb$MV_Ru4?Fy>3v6oqt&9$LA4Xie zV=uo!-FYKlv7R8~DkV3EthKIm!R^3~P+3GYL;N7nkrE6yfcwJfeCs);HbB4mb2(P= zU^pHYWXq)6QjgX|gZbgA0`8PMe?khHj#ZvaX-5dSEo_zlLJ6)G!hOCNC zMqriu&v84DCrG||nl1=(rRQd-s46y}GZk73w3(3eP*(HF(ArJaEZg|4Qgp2WW{gcP zKjqOQA7d;Uyy{}jFV1c7qV@79d*!Y_cR;?YIO6S`8;<4Zz+^b7tKXBpLdA`9M+DFR z{7Q=8I6yEcT2R7!fX)(ye!QHn%FcH&)hhwYZSuNbG#WQ@ku(vd=558=wsN3B4{dCB zlM;2DQf`h-v9%Fba1o@hsVrLHpt#jAtuJhMJpwj+@rz?eAKCimNRDtV@ByF!+J$?DF@)GaM%O>^4z(~(gLA4So@YTBz|yb;Zbk_VzI z-uo7q-jFC;C~GkIVf+s5Uh}Lv2b*Ez=nDtuL;=ZNr=0RwLBPzxO$6cK=7ZUdc@pOe zCo|+&ayYY#J(p8*{`zNsHfGG;+B}>08uyUobh{5VfDM=h4ov>3II`zz8hDQ&w)=e% z*==(C1;U{@dco{h^>GTFvR4+vNzJZNh#T}pAN&gCCuo=!6^~`~(~{2OEi&ry*aQ7$ zr-yO%tqsnB-!f$fdKM%`KLl97r=m7g%TI>P*`?CPjc`+M(Fn@kXg^occzCRGqm~qYd=aloB|sRrO0E1 zsqaykPGvsumTmPnHJ;7pi9#FzB1PCly=r4Ubg5BV_re_rPKv(R4pvsJ3h-~X3#k-Va-j3#Pbdk#Ya|wbSj~8<|(zbOs#Rj$XO-pkEe46*(>IHLuf4{## zM;T#$F?92ire*zOx+ei*G3eMIuhlniZI|ZzA;vs=3&|m%JWmEXNo+|<)JEelYJh_k z5WBNt?Ty`wwr%$!i`Ug^H{SB3QFU!(lft^ET0 zgnd@kg!jAaABEhsf{Ao=-U|8vx=69}b?p@}Rc$iQ+DapTYonCuwWzc%Nr44bk%mrx z6S*yYYn%F7TiRp`(oS%emm}?oddW2R1@(6u595y$!K>M7jr|r_`|*lfu7M4KCkluW zF(muc1;SP8n6Jx2yvd`LluMFfq`{*~Ty^UQf(2iIGy#Q9t zexTc6Q@VFM2ffqlP0$oF$^fj!ocFsLdOTOQ9rlI>ZmW;I#SJxWXrG>+X1IcI!wu%N{)Wj+ z@XkmjdwX~zm~|I$->NB@l@opHzxc3u)WgRJ7@P4d=2w7ht^lEk!JXpkv_~B5W>PN4 zr~z7(#ZdT0TeKrpnZr)W-5TD9U&N8xl9cUVZ*+trD8wSn*d)L8=FQ42Kf&f-t)LId z#!?+LHABn0CM}0CFGno|TA|jX7llh-W%P^4N6lMTKlfu&i1Rf8RKtQal|YBZbO{M* zN79r9ofP2uJGeSPzgKS*7D)uhs+{m(I!Sk=!TNeZ67&N@_GzO zl}Z67_=i)TVik33GRmo)S+U|KjYGrMISGCQ;MH)(8y0@IgE7OS&h>7i7iC01U+k;_ zfBkood7yo>R(g(Ft*tHL&DQ3g5$1|_vBt3$E$eK?VJ3s_rlP`f zRIG5@Pl-#*Vydr~`$9K!bj%#8Dw5w|W6t77ezm+Y(Np4t1mD$nui_k*$=;p{eVUC1 zWJsGG_u8eq-En8%eBA&U&-`ihM<T=Ye zU_s{H_HwA_E5(A6F-3)_=DLQSl}cYj;43C(#~JO?80z^EZ$?PR8pb-^I#kBLV@x9> z+l2Cn_NDcA7ffh#$s1I8OZ&n{YLlg4>ZnFz+Gseeq2T1M&KJx9V@ksPfG|) ztS_$_Jo-SXB0#G-RH38ABT~L>r&VSi>5);4{Q?u92{r&4@7&BY6@6VyA7Y06q%7?l zMMQen-Uk&q;btE52O$%REfHIvUi|=b)@}qfCg+C z*M8T4V32|Ddj<~yC0{Chz~FUuMtwg;OO1fNxr!V(zgf7EcGy)?&IMXwUMq0KPp~w! zO^O)Gq5bA&OKs&+1hUnCD%t0zkcomQw@P(_q{?tV=G^#ue}O?O+eQ<`EsoJ3xWUqn z>bs4fvVqB((|c!}Ybv{x5<6Nydl8SfF$?rZ3+$+SjlL5|8;Vuf_Q!jxn`SO+LhHuZ-i)O8!(hDFT`gZNstrWb7Ga<=V36TK=xusxd{TFgNHGIiP zd$-36Lx}nEFBvi6x5RXqeWC15YLmh0#Z9x~I>~LhO=eBco2Zx*7TOCcpCTT`WLITP zb;q1mIk_IzlyN31>GPwIBI+lCMcK)wkLQS|cHv&pV<`JuMTpcbkNSbS8`nX2BSBSl zO?xxvdwlU)%HJT&isC49Wy`nxl&pEG27*00@%%_@JfR%b>tpi>C_Qxe@XMmR z--QqVlo}4cU;Oki>N`*8jq9S()=lM2nB-Y&$beg8g-dBL=p76eVc|Z1tn_#wrzO8b z8ruOesc%Yqyl2#9;=!%+qCy*&`fY z?RI_r{_YdEFRBm#a~9+Gyg_SE!*N_phxFw>$xa+ynw7*32{w$PlIVQwQmL5gHsUa1OxvD<@Nr;}N? zE)i0vFFjU@#OMIICJaXV-Z3>KHOAPcGY<$N6?iB zwY8Jbf6W`jq$`DK1e`;N>1!Mz)%yhv__3)^BW>%Sw{#tySIc!pA8F-gR89Z*U_xf< z-M_FRx20Y?!Y z`ZiX_R3`}Q8a2|reqA(#r}%*s$|OWG#Bi?ai*x**->9S-ge=cReTv17AkRF=JlasVIW*-vXR zrdZYa(?`~N|Kj=Ur<2%XW->A6VBpw=*53Yox$_}MX@>KzxvmXIS_QaVp7V64+`YO} ze(AtSBj-=;nyYud1#)^12R+Lil|ftV;ypU0mpp61V-7T^jdl%+cZh5YG4R=dAc2Y5QhwdisA)I%#Jbd_^78leL>I#KP z5FWxmwck3EGL9p@k0H+z=63+cEE5i5!gT`|)U4f#HCIP&$-XY?dhIxc)fh5hySC|I z1sz&bU<~l3mMr%#WL5!>Qel>-c}Nbks|gF4S1tzd=2B-TGTqfM0U3p#WH!ql9^Tia z+O({XXnan%`;N)3;aKO~<$mc98QfKD zl6-1XE+6k@WyQ6i)c) zm0%unCfF8-q#d1rO~^`k=3RGY;@v}Nd7fSYon^NRF~G3kD31?Sa9-MHQM4Y{x_-P8 z8upmLw)a6qp!kE89muadpZm_{rI_aSaVDT5NG`~u8w!8hO!3KCpX#k?F~-;_mc zPdcXiM;d-m#GExAMR;ruzOw^r@wuMcV$Z|Fz4@pt0(*zGMCa_zNQh;jiQzjVn)`9$ zKW1kiWY*`lsA(F?y>F(;V2jSU>ox0GC-x(nuAN!6;BsqTE6Q10{CMQqR(;LSTS9;% zQw3L<-lzR@uWS!4UP=^wi;mfR>v<+xl1@KI-ilz3R)`NBsI}QS7-^P#ui3x9Hby=6 zb!W^hDh5daL?Uww6=9b)1+mv9tiMO5cAu^OIuyTzqgo(J?_E$2c~<;+8CC|kidF2a zpIoaoyna2;_Wo8_j04|?31_SdX(mbo+4p|Uc+s;J|F zhBWVY>wI%&s$egRW?eY%ITu#E*>e`n%=JpCBK($bKbI=kkw<%ZHII}WTvVSUZ<#-Z zQ3Id0OpWdt)nD>|Nbqgl7POFcI!X(k50)%GR`7NoUv)|ZvSCBfLvgLX0Nx!7Ok=2@ z6LE-Bt3w`_7HuXxCA5geQuEE>-5E@Y*UrxqhsK+(hE$l%BY9nIKeZ%iydEuI^DlmY zYA#lKf8#+Rz`bcU%DQ!+Oxx(%kvRlhxLAA925WJNy(4Y5;I91hD>z}@qb*YrARD~cPXz~g zG^sD=rNWSEC+i%u%l>==zbD7$2)_W_<4w)tQv#GbhwM2d96M@ogv}o~JuiE1%;oT; zV!iR3A>tT;8Li@D->-`ZjlWL5JjhzywQ{&gWqxP=Cr+)Adql0sXt@^XFP@0(DsF23 zN=%3_U2NfQ>M~utAuIHbpUH&>)+N(~o;Em>poqcWJhHZb4K=2m?dk|sl$cOQH#Ehv zxF+4ve1__7Ip21ZwLyqIU9aH@N);MeXjN|8@|L0g`9JOlZN(C??=1U8?3~pVnJXVe zSU;Cz`<{Km29o)14seHV+L>?W{GAnIJQut)(6qPd{I46TjSy6819%m{&Q3}BgJ&&+ zZaesv;%wbWyN}W*?_*BHB^4t&h#_pnEd1qV5DZ&IOtCQDvUm2Pkl*1=VSJ3x#Cyug ztj9f3&f^CzBDxS(b6I!(mYXn>2w5#?I^7qP0TvNc75v3p0caH~;Vs#YN1h=B9JU|_ z333}iPW{?k_fFJ>iuP#iMSWVpXXjggw|vu(udBpK_W4o|05LP4)S<=j*hG-x=>*2* zlIeESg%)%?8+*CTws?-p1mvV+KL|yf&W;kJnZ+rmSj|xnJlE5ZNiHJ$ON2BMm(JPX zSNs~As-Ld5Ex-mp{}@nhyYRFD3ynCA{|x-#vEGYf+P*d)x#6vWDB1u0a}sy4`gEIP zcd2-^oBpSzOYz0Dk6PdOA}xlQar+|q_SGwXT=UEPJfO03g_d#n$jer$ri)OWQ?Jv- z$NZix8pc`v`>7IPJc)VB;*$f$q=Lcl_YbVOeV%5Q=wb(b$w^+k#lYe#tJX=BfMkF$c1x?P#%Q%3^54(ZQKe8H*%l zK3nG|Mec1&A-KvtGK6*_s23(IuHyV%i$IZv87sG5! zSnTg*7JKh1QlWQD#6wn>a}|9W{SCWiVz>fnk0o|)7Eo9#T7D!coC*>`sw8gO9rY0M z34NWI$hy|DU}w^d9oke@psE(1YVwb~)RL09Xx(n=GA5xvh!bJnXo1Ldjiz9&3kjsYVF6#8+))e zLch)=did&I7qMX~vSV{7O%+h$QPquuyT0bo=_}z{dY05X!!^gbC9HEyO_?ZIt0BCX z^UcPax(h6dw$6EG1ogvJb@d{_eCf`<`9D~BCfq(1eUCWOh8#Gq=*|=yo8F+j95$o8UiOm)(hpHA}G9`XU+KV+LY)x=s1tVPCnJ_7`wk@SttK1;Zz|xdh9~uq#72X7S(c# z&+(0{^(att%>XJe5t#o~w~uOf$p^dHAqcZ%*V&uui=8yydNXAv5!I4t^L+k-xRu4N z<>dXN;XM9%_YPEq-s>BRXMIrERboG~uMRP;kf-<8Hq2hw#C~oCl8Y5(Z7wxETAL|C zj>?!?I+1wm=5^5zkCuRUU0CBRFdM6;;q&dW8WB2}bYLU!g@M$quB zN;g!Zug~J!J=n|<{~&>iBICX8SPADVxgx5_aTKiK?hg&(yY{!3%3kb;7Cq%}-@9?S zuhc+`z&P{B_843nG)nsjPlohB!oa+VtPqfjhhyL9kI)!=dQSt6Ts(B_~ral}j- zjDLzFRqUDDpbEXDF_f&{q;U+X#BmzAFrlD zob?~CUbxm^efhn_xbULs@-&SXG{_TpKiznvnu>Kq99NaNPbNURnK@7GM9=?aDidKB zd>Zpa_K`p^>j6RS?Q0T9Oxo0Z@x&8!h~^6khtk{Phk)c+(TP<-yZrDow@1(36aj=b znAli0%dtNdakiLC5-gTl+y2r~Lb!GI#G}zdmxl z$;b3$PFVnSXK(fvk4}Y!K~R{`-sl=>W!?kbnTfCQ(IIdaSN|X^XsL#WzjC#jlbsD0 zL`p;8>8SzaIfHu%h?sNMrg`&C7)uD|$_XL>c`G^V_g zZ~qKTd19D(gWar=wgj6)++C~r`-F@75_FkIiT^3j;UTUI71n(x(F=cDtOVtim`%xw zBwln*V%{)6Qguj)6OlP?aXHg7^Z}Er<_RF{9ssReYr8)80`K26qP!_Nto(}&;Ip^I z$2J`+hTO*$)kznc_~8{HGhw$Jh7+FDI2>z3+w zL9HWJZ@l&_J>v*db>$Kj)GRQTU?+>2UYaZqUf1E*i9@tFT0m+C)2w?e-vs3^l+_zl zOi63|%S8Ee9%LZq`cI}g;F(aOjoMAkSv&3HfR^hdJPeB14tqfLMlqSr`6M2jwKbJk z)i_#8ELP{e79^k8hY0M}_!w09h$gWC-K?#JaM>clq?6=~!`_By-X9%`pA1f0(Z5|& z&xG?YK7CHCxV?2g7J*@YMCSXN8y+6mdvJo|(+xPGz(eZM%6>K^uqV*^2hRFs+M{qh zV0Ql9j#fmhi#J-u)tSlum6DHP&k*Mm9YK@p*8`Cu_y;$dR?^S?bg`H?43(9LaP%!Z zvB))H!Aczwv1l6=w&)PB^4JlI`TXE>r0$l=tJNFwwYArZONEj97b>|D#hth%B48=h z!wjZ5Wv2IOnHL2J_6&%0v>fhF{Ja!WglAC$J;Qb8n9^bU`<+9f^2JK2Y{i01MPTRc z&n7DRWv0{0pv2}-JLT|l09vgewZBXNv`<7INFFxbra z&ibK=|6}UA$Z8rRCauSVho*vMioqZR!0Pe~m( zcTnrho@$&uD={aZyOih`-nEO~8&`^-DHQ(KKDX znK@+9)wV3Yf^jyeEO{8j8gb491YFXFvho1D%WmojIa>K}9W5I8uq2T@B;h&TR{OdU z&>iPj#ajGqLva)4dxZEOAB=1EuB@(p&e|V3!+7;!S?H$65!vb`7Jt?+?)R;T|NPyW zC&QeaTNyT`9`%wTw-QuxA^+?1ZFqqmwV=1S?R+EIlL(Z?yE&1I4KU?AP%0d{3 zud+-JclCR{(N5XffP=C}_=x9nK#T%BKdoo_itALPAx}NW&kmqm&r&J#WnM^FDh98d zt1)sSK}`Dn(IUm!3`_kNODLWBpI3XQNqo|?7m?|KaH8_* zd64BZv3FE{H7I4n$P>`0WWAm1Jl1^@+qi36q+%#|_=RJR*c zoC|1_!Zot|Tl313j}ek9$BI0L(PDxNRvf26eXX@Q)=uW#Mc>>fuENehthFKHSl`cdW7; z)~+EJ>ZTYUiajS`iVIWqfJjxq*9-MB)RaCd9~*uC;=nSqeCWgZ1rYarC8wL3!s%Y& zRPvvPIt>&GOZgg*6wcqVDa?xArgks?k*J9ss0Hl3GZZB1?=T-Y?FSAtk#iD%)JP0` zAgJhkU!kRO%_ZViSkRQ6mMel=lw{pRWRY?GT@6gzeLKV$W6tZ+T+Xt zoHE$=%nm;j7^!ZF3eS&~yk1PVu`0EYHf(R*CsFK4hL})Kj6xI!zPP;?JUmB0p+mo= z{*v$CBwU)fZqadKudA&7z~SF6&0&1v)ONb4$X(1cv^l#NPa;*&)PQ0_AcxoGvWNI= z;pAZf)v)lpan9-UF@$jV#9^rtZ~>aKHQ0eRvnfQt(JmC0=JKhDq;`6K_QrLB6@vx# zm+R59X{*B%&9WOd<8-+;qGYliPYH{CB&E2M|9KKwuekaRx;UuwHm|cRoXz6838>H? z{M$zAC^BTEidP^n8Qs&|KqRk4U_){_WPnwQa+`irCnjxz!=`hODFeVN_&jD7?ImQ9 zt}VPTK9Rh?LU)F(rRUP$9ZipE;N)-NT9H zTgF(&IkS~B)#(Lm(}fpM9G%NkPd%s--=OUlX6zPmN9>}@ ziOGN6f2=;UM(27!(b=BIo$mr{=T=hE){pL)M%Sb?o5VQ-XDPh9hN3yW0#M>HtKh3q(pv*T^(BBOsY_N&-SP;j^G<%W?ojIuLRJll{X2MY9ln7ks-$Ccz2 z_|71#K_f%W%dpJJm)RI(he z06B@HLUg@i#K!XJa!0bKbHxkS!Ke%uR(=DJBf32bajCCY1G`4;u?sxzWiivu=Oo^1 zit1|4>6aWc!F;V+Cio1ezGW{ckD%p=sA9v0T0RcmI#!v>-P>BB6rA_GsxYCxEE{qq z;T*mfBQ9n>F%d}%Gl5fXuvC+;9l(KR&aS5vh%PnYYF%`zLbvebU(k7$fe=o{0w@wQ z?xV3y`gI!18R(2qsF9P>1~J_^Qdsbl2mc%kJcjZhZL3>2(Bs{i@U*G0+WZ?grovPI zJy#aa^HT3^(lz0StHhUTLnvH`P{O58D{QNtcUTrNa<>bd5Bd2J{0xp%QHvUr_`y~6 zl$7c_b-E|Wa;oWjcY$7t5>DV-roJhugVg<^OZ5)qB&J&>Z001lkZ>?B)+LflWSFAx zq*crGYX#+UP*rvM&LL%Fu0mz7Df7ZXN`k>Iy`u3&yR^_g?}vYFkYK=N1JfS{FwBM` z=5TI?puXV*<9{a9I8=9rkxJ_G^?{yiTn$NkMe6s^3$68Vsy59B0I4xH9~Rd64wm~> zVJ{?(!;9z!of}BpUiZCDs=mFTylVT^A2y#gOgMCwQ8+c95a`65=}Ma#C+zd$n+Ylh z(*$etPg*z&oC)$r?#gKA5DjlmSoS}AB=~Zl&%#CFk><%&iMj{3J6bc&1Txlq7tDV^ z;ma9O@wKAInG7sAP;{bevxStCY|w}ycbv}ecic6QI*v4#JcF%9TBWDIa&sjK*+b02 ziOtv0sUr~)bk1P($aXslpXsZ`RdftC0_!S zgIa*v0eTR>Xfq5nNIuAxz8$ta#YAF ztw#=BRQO=ZgoGPqjfgDQeCeg*xi(eBetsu3&lxi8{K`% z2xcW>HI7lsU$!eKqphK%q zk8%%pVmDQnu#iOmO;8%R8IG7$S+}pxgH-^6eQ(Mpz`;Oi39-3*l;10{>UN}-@qW|X z6Yr{4y=X}7+?~zYwZNv(l~xuS3P+3&EQoZPET|IreOMOJ4g04o8UUh-vaWJU!p)>u zLE*B-%)H^;H3{!!S&6_a{S>RS-!H+OroV94pkSN%-#-=H1rrh1Z4x!sNinHMHPb$D zK0i)&&2MF+Hn;h6hEPZWSQ+tQ#RN2YAoX6^-F^455t)Z{fUg+kkDyjCT(4XMWh4C9 z(6x0hF#fcrt`q`r!0*V?X?C)jw)ZilELm7oLut^Z8J6G@;_OXrOktnUxZXoLU5m>r zC(zm0STNt5h3tW&<(%DaO3}O%`9*}z;n&7aDF!UrRHTE20wajOi(jz+D|nTDYQ_b? zGzb;Zj%3y4E2}H8qD81ZZXxnRAopg-BUg%gC-PFz2C#c{md}DBuxp{hBJki*`}#SD z?xEx#nDZQZsY|&%!Jg}vF<}w~PMa*p=-Q_`7})CoG*R#J(io6mSxMaKTG-Ia7v@E9 z{78~IJ*E%T@ZQGl9>SG17KiS=-Mw70CcITJ{;!Ja{>Ty@bFqk@$f-1>26Jlr0%JuB zmFNJ$6WnfNiQ%LU?`*B(I>($hz{TnL)BucjjD4Heo}z|}gfy%9ZX$hNCW>Ut-iJOhiui~~HuBa3S2)KP#m|Ldn z{d@YU2mU-KZt9g^yobHxUK##sJqatLt+mX3z-56H>}v_Uf7ifFMETg*WPbv9$7-(_ z$H2ptD4eu;&_)Fl?DVjuRK2*gdt86^n6hm6ZGj2uBZ~M!;+1LoTT}w=EJL8$`Tzll z;U}mVP2qR#JjITYlN|mohBYJ;BzWC+~tl;@^%lGekLrNE1 zSHXZWe>38z17%ys(~`6a7KQ^L9-+CG?l;3nGLh-ABxE0Ji5}!}sy6HdzQz~d)SuxQ zI04^HH9o)B=Hm*d`jS0fN!NBFL{*Zo!|5jF*SDbcFC*jix4U4d#r_9zh14>b#_J`w z>hpo@vw5ieQwsD%zk?C^0<0|@)mn20%W{crmai+NV~u&gJe^jJ0yN$%8SJ42`xZD< zG_N?3LzW$>(%NE#U&GRUFHI-qo!H}*$V8QJi&{6$Pp@uW;GnUHrgH9?E-Uu)aFjY4 zv(|2(EVc;znJVA7&1+oX1vA|dvUWZw{js?8WR~TxCX^EXzjTdGedbKW?W5MiDwtVc zL$%-tU{LQi`u$T@>j)SK;}?dqLdq^|A93SKUBr)?s4O1#22nU_U zgkwjJ9H%&?h1S_#x7D`Jr?7?R4Spxku$(0~lTar&lOf@89^-8F72Pf9(O3&C%74F6 zF6E;fDFivP9I5=X>FOpi#}IvdT4N!i5&aWT{Ch@&9#%)Qd=J=7CCse+#QmgvdyFm+ zm%}Y=FD)L~2OUg_>1dt?94Lz{PNngzCQj=fy=pH|!5_cR6@{MUTV=G|K^Nr}YWU!l zXRk6SE!L(=h}zRgCqKDH+JDcHQhg81TT{hCFy%n^R`LB~TK4JKsz$lfij+7QFW@_& zlyiwd7OXfG(s3-BTG;D)pJ-F!~FdGg$RO_e%&I~TN+?J#kqjNS-D#4xHY-E>W3`vVg9q8hRbfJ|3xGQ ztNI7>BzC%4&Nh>(-Lw!|px0rIJ2*Kr=j}PJ=kTfL0zEs^(Lbm+Q?!?oOgW*eXyn9Oxm_eds<`ATo@TUPz_JAO5?EYE$&%Of`?;lXbgavC`C$?}_W z|9N9etipbyletlAL@d%~jGoQ~P0Y9z>r`tZv#e=UlO#W^Ka7Q7U_|xMicBYk_ zXUlPK=??IrcOOX|A+ZYZv*U`}?P)RV{fd!#9_9^hdPhZyUN~9qgEdcsBl4~x?wWEh z;jAH&nP-v6ec1B-clEi$89w9IGyTWtr}t)*c5NG$rH&da&6-PJ4v9}~3)a$U8zR0) z8P4a=;1HGTTdBZTHm2Qse})?)}D2XPI`1rAUGK;zy5WOmAaBsa;4$x z?k~oqZ-gyGtI;7Bp5D?*kw)d9i}ycn?jp)gU}^C`(9jAIsAbRRMUx?z+~nb>=67@D zcAtf8WEbVp^5_Bdtvlt)KMCLK_Gm7;I_gAJ#f$S#M+3)G)IYf1P#LZ+^4+~R&yScb zgB~$w{Yl(r+#jP>6@TN4Zv@qffVi~60~1&Ph!M}I;14idCvA-io_TJ&iN}$APO3;B zj!PfR=ko>5JJh&Uo;_CJVP7|>WU8yU@Fz$jaPE3a_a>8U>zD=;sE?wvMSBG=-jHZ- zk#m>b0Ipf_rmCA=y926Ynt6^#jJGy43!X%%L3tDSsJG}9W8V9kU@{W8gO1aB%BjGs zzN0#g*ppbDvytQv8Rh$z^@Jr^H!OQelp-dMSB*Bw(v!%=$JpppQTtoVy&^Ej!aU42 zXI?_n+4fNu3mOvj5KdF|E2nrcE04$7eSr0@6pGzwD?-Da#q5a^Btb;^987eXS9Jx& zF18gx?UN=Qec5}KEIt>T>%4PWLrN+%muHH&M!Bz_P@IJxtIQu11oW)%9TbSxr<^ZXnBF-I5M z8w#=VK3lu6;-P-ADLH{Uiv?ge{L=X1Lyq2AriV}|JV6nL%4(1=N$3+OsTQ(061lD$ zBtA7lk6lUJ^n^G+`MDO8L_^J?_K(k`Khj8fdTZB&Rb%KOkef1HqoyB*$x@K;z2^B~*lzaRz#z_Ucsixd`N%#N7Mz zaNYe(xt<)t;XB%yo4Hnm4s9O4n<^up`0svsnVDXabRadgC~Tb{FTCz)VLzX5tsYUZ zCGR3S1%x1f$>L4nz~>3M29*n20x-xZi-}=*kpOYf%!>UH%y$)amX*kTXdilbz)U1 zHb)YV8QWRThJ+WSx-HmY=MqQGMj>WI#zV<;pY69>DH<5}6lErUH!u1z>@&9Mh&F*8 zVz9!@y9EWQz+i)-Z?&B61>ew~)`tQ8C9c8LLi=K#PYd+0O@XJE4!R5Pi%pC7HGLOY9$OrZ*MT{+E@}<1ET%rE$ z)ru8&FU_Pwa&r(O>Pg=>t@{lnur-#9mj7`HR=N_K9~mr;KBXrMk>8HsIk=bq+D1Td zuyUB5oCswUu`$hJr0Gud3rcR)>)GK8Fu^kI+nZS0hwX9Vtf`W!ubhOWOB_CaP>|EW zbGn==ue|Ikdu$A?8~KipJ`vmqj5){qxBxff<@E7w2s+%p7Q0MF9w@*xzKqEn<#D zw8PQ^j`5+(^v6&mRkzc;Ee^B)NyUz07v~AE=~%ubJ``8fKM( zM-T4v3uE7b(w@7%Ge*6d2lOK)d5(Iy-Bf-j)JTk+VDA>NVxCp$*w8j4{9zz_q6*3! zIecB>PqY+#d$NEV4HtDS$V!WE&t2SPyj15%LyG_U9yBi-vwD@^y&t{&H57%QKCfXb z4>f#r;j#Bzh)FM4A*UxJ1?~{}p0DUBJ>R5V4*AQp1pOSAr_>j311tJ2Xv-yMF?wd- zfB~G1cS5OsiLAPKQaXoGc+!)dNEQ7pCx#BcU%-8+2K;qeHkRUBvu4$t>!RD6Qh|4| zXLUH&8#{jca0Gs-q&>#SFMz?ify2@#n;b8wMh{~%rx*8}=?CelYMO}Dx7=%EPdLE` zJj9EGmEf<_AYW{xg(Qjn05W%|a?<7N~hZJS=yEshGTeKDu57pBsary}#(K7FK#xH*e) znYH2ny)(Hl%VYS=&ZV6neTHr^&o4+SSk0&4r1fd9Scfg{euqD4p+P-GIlK-sif!tJjHr=q+eUcZ6(}u;=#NdqSdDfyKmKh3`GBar(#LS5{Bf5)n85Nh$uEv1ZU; z%e|rkT5fSgZ>|V5nN#KFUVvgI;7SE5w?|cm8 z&yfPNF9S~OLRRACP;cpZ^uU77lLu_&=b1}`T9C{*Z^Gd1mOn{+&uF;~NKjh4!7j?P zuGABVG!2URt4F|vJppKC@v2-L6S5rDdHa{Di&Dlj#hTc!(czjyl5h!W{rT%<{hp~q zk0%24hw;&SeJCR2B)6!|Ri5d@X)4CtNL!3dHIF>8=%8u*DQLU%ClUMN1L^|E0-eb6 z)tWnwHBR5*KA8UJigwoua35f)QG&KCBdviq7q#~R0rVALC6X0Z--9xTdP0Lb86;1i z!VTa>XK^fzqd6c;R3a+>aG|m?Jo&h6^4gAM_eug^*QwO573@RjU3gH*v30zmf;<%> zCHic~DC^ffgFA;17}{>wzH?MN!dz?ik}B8)VGUiAL$+WFUg@U`oM)uPvWuTs6l*%2 zt^;p8^}~rYAl`GGwUV$z+nRspaR_`ScE$e6VN=H^+?e8AjsZfE-u95aPU?C0I`iU_ zK-LUipGQ3>Z*3D)z3ySoeEE7DHdiBQJudM#8qx0eqpa)w6~1oth{N~Qh=#}yery{v zj7N#7xnBBfJMQ_7$=OK(ohIbQM7wVp3~*QHhEC1bxW!o#C}6$rZJgaPhP#1hK>qMC z=jp0B#~Aa;F_w$N=Zc`kbu zs(Q`v=YYE@5XCJ$J;XwTJKSe^HBWUfTeH|e8TO$WiS<`mHyQ4EQ%mF)X-Wg+FM5Qd zx3>Py%P~b^v_}=sWKWHh3Wm*o`4|4Po8P3RIT!o1mmd(Rc=>{dl>SLVU zP7^k`EO1ILG-c24nzd|fXCzW3>kk(clesw@II9?j-HyHcoNZ0JIq!E4fMvLc{mUf=@BA5*`II0x;#D6nVfK1>5$VoQ35^OlQU2hH( z*yH>%`)VKsa|<3KN$bh_5b^P~Y*Q0cu6Kca$!bEq0lJ#qGYX=PpRw5BD2cea7rE-g;Z64k z6YNx?H<}(=jGzYcNTG>#EX?lL6Ck(pNXz2cdhA-uk?jldhcA`tEIqx?%3S)nU)fuu z_&2Qq@%ghv^_q+kP+rs0GnHxt(7NWVrJop|;rBils$VEYRTmM^DgiX}S=s@THD&nB0YcE2}i|#Qd-*ooF?lgyKf1 zQph$PE41nuHK;*1`++iK@460fLJ(f-^Naz$714+bYK_Jqo-P9aGC``)4 zES=bY#eG7UEnJD*?PT;CauEJtG~EOIq2BuRs5R4dRr9^m5K9l1T`3*dTDP)6*+IP)QEe}; zR|I9j*V1}EUw$G$^(A44=40D(O*p;niz?01A^03@6?d>nXe$hzFK5<>Vb7G#uK(G> zGVDQb8rUQ4IV<6D-o}we;8q-kD+K~FW~p%5UEAZl6h|=ACY^lM*PAy$Co^$@L2a=n z$XLQgcRt?)X-NrqbwW?uJ|6lf64@gfv-i53&1~$`GD|=q`gXym-08UA^pw|VM^NopF8C$RoHrU#U&Dr@z3XUKL!F5d9!8c5FIJh6ZBwAwm!$lc@Zl-4d7z~4>70z*CUZpm8;#)^5_TSKAIL+{5J ze3*hmBuAHpuifPik{U z6aa{-kMIu!+o|WbOG^WWk7?&^c`KdwsX#-{6P*da4H~T9kRvWbmMXptZ?BSD3@1IL zfW4UPswOCdORo5rR^bibrxA4l6IX_L4UR23S?Zp#%+xtj$;aOza80QBKr1ADb1V2E zB!-%D$|Jd-`#I3aT+%@Goeu`x!oztMz((*b+EWC6B^(}V5UyBVjS^taP`DF2V zMo`E|EHshbSI_j1pq)Ysqt;pXLcgw8P$j9@#tLtOY%FHx-`?1O@@p*fY~hs?-$QX< zK=*u;xltE%P}B+r#0g?|D6q1AgV34ZySUuI%A>2F75C304N(R|>LvmhclHqKfa zRZb@&BDx*az6kR)ONp%uJ+iwf`1N7yN=?g+wZ*mg9f?4G^&PV3t+_syp0&6FFhGnX zt}=IY^VRn(O!v1cUVWPRNm|UJF+Z4?!jF@}QLzu|XcrH+Xn8pS`e|JQV_z@eVsKAU zr1!Aby5Q^lUeNdPG^ckRtINHn-~@3P``c{{7h)!zdLsl-2p**E!E0 zHK*qilwUluJp&vmf2pG-&7=&7^S)K|eLfNa*?pM<#&ALzf7$mf*$`~ze26-R@=~}y zfP=yxz}W%`i)&}`()~rxm?Un7)h`EdKN5Hm+q)2NCgX zI+x?E$b46Z&pmI7TQh^(M!9t+7z!qXcfau7ht6?^zyO+Z<1y+u>8*mnO@WWWSSECL zKlJj%0aOnv;zW#chK3*UABN9-lIpNuejZzwhXgyW3I}72k9LKBwABCWCj2ukVr=ux zZ8dbcTs%=MWR)DNadwh3i{=zzSDe4LI2cpTJrpG@|L2Id{9Zup?FHs4eiF~E{yQpP zKfePUWBR`SId>LEKO3x>`TQJuw_Y;K?rZ~xMW?j2-Dox$HrMuyDYl#m#0~Vs=b?|_ zI$f^s3astM&odO$A1A(pBH)$gLdoQMgiG9F=wbZ-L=ss*JrV6pE*XH68|+$cpCHyu zabT;JgTSw-@VayC=d!g($MX&HKxT>@N&Gt@&zJcgQuSkY?6svjlJyqsT z2+2|{GCC{(W45#HLnTpD_TRO|u0h1pSmO&P8I($%`L!AC%wiY~@Bq7}{@&+9INYY+ z@U~^~rtzeVghjBfj=#Z*`W%3ZN0B;_45nY@nNx*M&hgr=764~NXX$q2cT7v}^Odj0w;wlh90ti;{}RD9 zPETIddScIyG{CWQAApHjsVMt8*ju*mq>`AVOtFeByXYc~?;Y_#k@2D24RP&NfXsHtGAV3U@eOBM2kPZXZmGJE_qmnf` zw#0EO+waQ~eO5W$G*1d{)XX~d*%hUU2~g+8QB_Fu51%OeTDi-89MYakAvgtEsv}u9 zSm$h$`G)?ci_E{1bydAu#By}}EP^H;HD0-<+0C);-pyE3kyNfJ88uBFjNwBXfKS6IIhOq1@AZwwRw8VGpsAGT#= zUjAxXYeetm1wiq;i~&OO^a+!{LIu>%H_rlmhDxXn;8O&CiCv7UTjyIdG`G%ell6+B z|AljE@$A^7$1=Xuq~`yg;L#FWQeZTOX1=9s80RLOtj=#p;M-LfcgDZbL{kT@J9B$# zhR%~P3-RongQ_5kXziRjLE9}=YZ|d?1|q4z>a3d;U9h%x_O!+xLKUcd1PHP>z3@Fd zolV&Pk;%aCFHquj&7$FAoDHg)u6Ow4!*TYjz7md)04h9SG1ujq~lc4lvaZ; zehZh6`~KYsG*;PP25_#ZI!ANkVwZvLn#>UU<;E=1k4*;h4h}C|+@by*HtuedwcyVe zpm5b0Oi#d%zWSc{1_My!?4tltzzu!#;T_iCQ!2rhHe!$3{Ky9(SlzuaE&_Ieo?Nz8 zGk7L{NJhGE-B3bv8G|B~Bj2LN?vW4;BY{-41HZvIFU{%=RZBu_u`E?^<*_WilECDz zNhXlZe_qHXj>QT4L&?JZXWNi!X=(EM7~4q_^lI^}_fHFWe=Z zuyEp%=;0 zh3+)kF#t+LoTz17)PJ;?KfU_4aVh_X~AO5BIj7}#omTRr->Xu6s2E1PTBTl-Qfdh{K`G(z^|S?65x;q19X2`yWI&OY5y@2pFguZf||`xJIOVkg#Qh*g%XsJ_i{-k z?yJG*=N@R6Cxz9sBJ-RR1|D>9y^>>HiE$eiA7~2ejI%A>oXB$sb}!@(|` zYowJQ)YQ$#S6wzaC5&tJULQ-$d3&~5fW9yxD6dX%i1;mmDzDBR7ExSiUIg_9 zOGd)Q_{@a2kwE5~dGl-$2iMDuw37@86GV4N1TAl7QQLR4dc{>QTvVwm2$PzL{1JX{ ztz9hk0?B|}L9Nh=&_f8E4J-|9PeTHJcs*=OF3^^#Y@jb;SO9Ia($9VSw64%PE|NX% zv8St}0hg4ddzjC5m`(@H&3=yh5Dl)=*ZQfb;=zmF07Ue|?fUB_ad9%WuTt1@3Dp9>?L=)M%ixNtOjpuR(43+yXdLapx=LsDoHcV zGYYZQMH8Ofo%D>wyqsn#HJbQ#N$z~YWrkOYeCV|GHL=4Mz^&?X0_ok;p>h<^}d*4(zj< zmeOe=<=t!dnNuaKkqM}fJddIBhf?MUXz+O(=j1BRz+@knn~Ql#0s=WCUJ}(a1~Nr% zq;T+smsPVvqaH-JJl^o&8Qg;UEvf)gNyE|u#5@u8V3WgF6>axATw@ixtN_4P9bbX; ztTTbqA-ted8+9oue+AZHmTQcPrfjvUCwgh~1hO=L?sqy(^p@nOy3FTQe-!>9`_I*+ z>iWw1vJ8`ZCRxHAG+33YKN~3j6jYgYiK$BX)oP#=q+<(#Z!A{k8*1{2;&O0ygb3qw zD6#cMpkmd#RW8mD))etD&DIwwmp%Jt7wK@s~Vak zL1BA8*3$AIOz|LIX>5lgxztrpkfcnEsr}nd!n-8cylp@J}mrJN4IxRs`UQK*}3qNaLW!ljS*|wh6{TSZXO3% zwqO?O1^}h8TodOWWdqEWd69aLc6u?NyIX2?l=(fQ?Ee(`kNJ4ttB>D25&Sun(r|Le zF06*;321%pwdDVxTm#}`i@!s~m zAw8MEo2Xp|f2uMfkz*z(Xcb1an$))6%pUPHx+*wlZALekdBGdk(%4;VQnZ=IHIyR! z(yF8Jr`TLo+Oll%(#Zfz{cnNH>($M#;tCQUM(~hZF8+QidP9y~siZ9K+m}gPfM`YA zx!}^`#{Zn}ZuW2fV8uCQU{19*U!QV@+!7yoBW+q6&IHf)AIyK2P@7(AHHN*gIotJ> zkX@hYGVeJz`sm$dBi(;N&6zTESHlgP_pxh{RLQ3oWyBX&prGdnJFnSypCw4e%aBrU zd9_V3!*i7io{$?ZUnS^df9M!>yQSFA89B=?lW{#p4r!7{LIE(;6xcIExwaEQH$gU^7X_60fYcrD`}ZU52R=D%isHlBUtmpq}&9cNO9CHrwY~RqeGqDRgF1F zNz3I5Pgt@nW2fe#5`&WPt+o+VL6_|arxKH@O|Tf6he7f1P&2;RXzcR@8;Z>^Z)j#C_yo5q# zPnI(t`XNshIhIH2WFh%e%Nb5<4Xb^W!d!0Nah#WGO8)N=88TU@@q23+qZ{O^+Grl@~Q^oztx+0{v z#iw~Bo4f!sdwREOi%}0krnqcaka|7Gb${}2__P1{j4ou>Xfs!kft(1*r?D{H%sph_ zA_pEl^C@vAivW}pfZ5I@RGjwc0zB?>wFf`g6x=8!JZb&ufPja;&00zB8MW2MjY=^> zyaOwK$F$Td&=_|y@;vy?ddL^b+vlYF@5qaT2hq#J7xpe>q^l+1F|eqiO?v13(6SCL zxY1lT_Js~8J{t0!L`vpVc1*y)hvN%b7B@6KoTFg;K-bXISxBPc-joK6`Nh4sS?5N2 z@uHbk#_ssaXD2=7&)74^X@UiH+HN5opqd?;&|ta^{rCvgW~v4b!sVY$50C}5nmCQI zWGFS|HoP10=m+)cA=?4}V8ZdFa;(c?=0J@Df>x#>mhOtLgbXEYw9yr;ovnUe8Fv{N z{=!KB=n>(wH@ic%wVNVmWY-+buO)1SEXTXvmuax1k(F!JWR`?nU(Fv`Hb4nPRBhjR|XKDQ<(X{9?PR$ax|x;W~`?N@G~ z1o;fAo_#3Cea8jQ&7}QY@X9yAaiAW}cV-3p%-p@R5l|h%Y?#ke#n{ch9zqY8c5Qos zRcCrP3LvG?m3qFr(shG!LmJ+kwX!ZUjiUHx8%qJd6?)>`=D`C|y2?pzB_Pvfgkq=r+|aNm;qziZK#NIlqo;eNnCjeHnF^1+v<$ruUa{@`%_u(E|#6RCxHyU z{2Pl|D}hvj9q~s;bcQ#wTqOO@qc5S=N%njhA@BbARI@WacxN$-Pwy~LZRL}xeGQa(uP`xU&hMr&QV zj1De8g%?=aQ_XY^?Had_TE##Z91kHjwSeSM0?v_Yuy&>;n%#bL=yMR8T_fG3bYqI| z>Ms^T_MR+(KY3YrtBsS1%A$w^^lIZ%YKyQ(Hp|&M82>+b_+iRvXS{22 zH_Mddv$e~BH>AwEgLg3u#oiT4n>Er{!IXhsqamzt#~n|GXa6FPoQJ!^)mOuIP({`HZ^ z#u9@FJb|`l{lD2izs(Pky=|~n5*%U(yer8~U|V@ScOM*X!AQmUIKNG}u7vAH0_g;F z|2%&|uvpkew%d=}uA2e1Q=S5`z-8XsJYU;$5O1jZgbmyhf2z1 z5TO=)mR9uSmqVWf&j2efC70wgGNog^wBp@vvVO)_eZxY2r1X_sob=SlG$+vTFUkO4 zhTWbmkMuLYBlLy}gM8ZX`o$Q!qkQaz+NP0}Yc2j#jq97*+@9C+#L5Jl%+c(ins!(k zG6}OQnks~Co0bNrFM!u6Ml@)4*ZM$+oN6cfbMK#xMI^F^_jyqJvTR0~#nfxM%BU7j zb&*c_V?h%a{V2`vdKJKrqh4b|LvIRD7C3uckVA$b_mRqGJBPd$5(Yy3vV?30+l3Vi z3HNC!0;;M6)0OfDqc!~kP5yBnZm>Fg)PMEefBlB}!3ch7irEj31i9Cq3UkHzGi}&- zNG)9zspdeAsBK2tFwagP5z#Ht7R%Wj z@IjKE1*@oZ>!!|}D#*ENP#@%nF_)LCgs(Apwz;U)X{w|@ISc_a;#_Q-&2E_YRY~Hi z7P}k33Lc-|ve=cL8m2qSoP9lq%B8_+|HUZRh?HZHL!~jbXm(7cuyilvn}Xi0H0;$E zNM)!O=uFFH32mq^`=#ngkf`2z)xKg%9HV{_45Fgn-pUsgtP@p^+elir1&jWXL6@yT z6w3D0PqA<23h{Zmf2pDzW=tTo@^Al7FH0E~wn3QbFi@9PyA$g^)^n<;&Og$6iR0M^ zsT7!Pa+i(5de^TT83xY=8|k^7S%-ca!w2j-olKx(S1q5;-QGGsZ5199A8qb%w5#iSGc2453RxaM4LY@H6uixThQK?L1kapCU!-7}yi>Uvn>b#?y2 zpQ_*sZ)=U6xa!B(2U`@A5Hf|A=Q}{kqjWT75Hx{PE#i%LzDc6{SW%y1OSFL6H{gOH zB{+vp{@E<+Y4M(xTXAbV3E7taMq4S# z@!AQGt0X=7`c`&5x%pu-C{!XZFuUvO=vjkH49?w%XmeRn_u!ef%zg2HCE0HYhphIN zVBu{OS4~%Gq=GLdK>>d82K>;>$-iNOF6Oo9%^=FN?m{%edVTuTrjOI%+IQeL?#{<^ zv$N67Fi@v|j1Y5eKXX;t_9=4d<~6~zn+wlfkp_4E5L3P%-;pDYm;Cug_uY<23>k_X z(r6)nSO@?3Mk(uPj4ON-?Vm`ZFYgyHzqO1DS9PFqRpd;BFzV!&qI=7 zW6JSI#Q`DqXOhn-T#O%HdNn*Bd|oF&O02M8e~==Xf>f9{sxl`_h%4V=rrT_bs?_ORTNnnS;)#3?d(_GPGajr}BZKnYb2<3&9&&cWe`IGg;P|vw$2h(TR#stD}#^jpWlWQ^=jGjkdcujeF>u)A)%8`~tFURAC3wxsD<>2RE~GMV@GF;(}sTyPY}wTaMD_@Ho?>-zpNt5Vxf z7@c%KMawjY~df4F^M{ zhc+a1fot3ZXhWL*r(L?QD;Ir)e!6~;sw$o*&c@Yk;r>*jjcOkPvEOxW@BU%1P22{b=^?Yz? z-&V{#_zfux$Jq5DM5CHr2=Nlj`=AT>{P)0xw-KB-f{1o$#i4QrfV(oqmwXQL<^g;>)t{HoHu|ipdNznp5rKnMK}b%&?r` zb-U`13}ST~9e6B@=H9sVXE07Cl_1xm=?io_4_@M5KGAMBGx++c%DreUd7|k3Or$cf zmUCZ;l=FCE8=RT0f7;^E*5ca)LLe{jfsrvRUgEN9_)7SlP zjxE_M9?j>+i1VC=z&YpG`jyN4<A!|dQF3I3vWvCugaU^vpV-#WU8N?d_#C& z`bnj6BOvbSIz_9N(EFLpQ6M4huV($ZGqXJ8ZNII*X4!z$kC;uezjC}kRW0(ULujsB z8=-hA^&FHB2;9U;ij;z@d3yZ$;?Q#WEjCw^ypT0{;8I#yfxhy}-7yMPh`^R>)$Kf9khIQE>3OCM5(dpu(=J$KU3>|cFa0u~qi{N|d_f%X z>S|h=!LJ*-pMFPIVQAM`<6H2T++s9>$X2 zS!IJ%=BQE9LUH`H^&3H)SZ+^aW5l0h_=va2vAsv;U*iSZIOaLsIYbvv2W~g#Y??o#c}PNLu6CuEA90-iI_q1>8#;WCx8E&u9@qg1!=(;(OfjJ zgG1oIwGUqeP+j|!r@WT)#royApqT)Uc%@CL1KL$|`r^}EX4rD ztU=iE6LTKO{0{xi*giQff`l!Yc(FI!?>h5E8~m#4&E|r z=fE^nuVtMwH~j^;^3Xvq7raQow&PRKWM;A!Y&U&};_SJ-hJKu78esO1wrcVkLO>Sg z0CPI;d;T-!iQxO-Fp?{XCgo1I)&)_xVzh=#5DUii&vsnpEVB|cSaF{!pNm?)p#JIU*T`=(uB!nRuNS;^8j8et$5ju6-|hL>`*MO^ch+F%egwUOo_47~X9 zwu$l*3G32*tp#4-&l;{3vGr1L@fdSD5e?{9krlooBFx@Vvc}G)ESuW-{Xf3m1FDH{ zTO3Y6KtXyJ5T%MVkzN8Sh$6jrL=X_^BE1Z#2vLfF0!ovjNGJj6J!$}v5)nd&ASi** zrGyspAMkhYd+&aCeOXyqvsPwyKl|*x&z^H+j||dX8j{2e&x`#?mwd8$91MjeFb}-# zquDO4g~)r{ezea%_;NhTUUn>yD$>kZB1P^cT}_B3bIynB97iud)wa~)r}HHA>ptt&FuYLy}ck! zc6^GPtWWP<7bodIeOf=uc-8Yx(=;(<1k3;AxHBLyYj=De7p(B`ni@zxcgLUOm@0Si={_ecx99U-0LjR$@Hc%20|9uXTt8!_kpb+#FUnK zGRMr;u;p!~FQ4tE*`_<;VM8>D)UC$4T1RvX$vac{-qarNK;vigXpQ8&fL$$H#SYbx z7I^Kug{jqsxly^I7P#{2c$_k_l=8Q#kvP7CM6 z^Q1kJfS=>aTQqE+`P4^^M;nc2#!h^sBQ6=LVV-_D0}lL)o!<0`q|2kLvx^T^*VRM4 z1eCEWT6W!(x=dj@4$vGxSUnX-Wv3~`r?V{E@Op`8f8E~VYOeWfO1pl5?-rUQ;;1R` zdTd7PPwRr#W#oR`NTZ%My@)9!g<^-06{D0a9qJOTdrmN+wVHzsa3 zxfqRcVo6r}!yvH!D`JQD707*K`!R8h;jf2MtcTo3v$nl~+ly`KOBApin6atI3;TLE z#+i-CI*|vj26;qSsaC_<&F0vtqi=H6QdCN5l353aNdt!|j?5ETgMg%_5cYO48IaZt z$J`d7wV%=imO8Vt2k>%lNRw;9RXmkN_rUdu@{>z1@3rN(N0#@wYrObrTraw+yhZ&+IcNa!ysPi$ zhQbUi{G#&5BjvmA)N}7#0ky9sHQ^@C6@=Fs2R!pn`sNI(zmw5wow2|RnPr~Ga*t=m*Zm&Bp@G?X>2@ZP})iJ7(JB<8aM8Ndeyh zz}%Z+D!IRw)>y6NSz*?fNq3ZAAtNS&y*}kYbaa`^K{?;=z^erD}1IpFF*^id>qaYPX~{C!O?&*CR{zxa zBHaxNC(_1zd~FQ4FC-2j&8zbx+?C~es9Ec4EojpzeVQLp`mA2$55Y_Lg;oH=?GhYw zLFJ+05%k8sK$JFoTb4jusLjdRWe|IrBUr=tR+n?+i@a1MLt-&I`5f)%($xOLJJYH! zZ8u#TSoY7XU9QC4d#JrXr`1>F9UwL8rmRgV3ZWU~Ogz$g@wH;Rlg`mZ<6h-=k6FAF z2gc48^7xMt+dso5r)2~H#m#ZqF%4)q>kd+jzfJIg;ms5|fxD7tZ<-*jUM*d_KzWEv zyz=yg&4>f{NZunEQ~no4P|Laz%&zis?!C&~pKp^sP-GG_aEu2PUn=`_swE-3xI1L? z28AM$eT|mPfgV+Bnm>jDbvvr2gz?6)KDE`Z?|omr@l}1TzYrLO*EVY@>o@UaWZlDv z(-ydTUGuE?b~$tCIrgQbJidjh!kfzZY{tE?H$$l^TR*aho-;2j9HNUbDG`oFEz3>< zemBybqp2289oxz=#}&_`tl`_UaMHn~P>m(D5omklW;mGOEwIgbcUPdoOuzCbm~$nF zZp$*b0JK<6(5j2moN}X(Wm)bt>2unOlD{cBc|ccLna3VNx3Lcis~fR2_W4jx{^ZKqbqU$kuzG3w z2}8%O_?l5Z>53zfVb901qlA+=%cE>ESmC$&bq4f3N`pD3I-~rQ|)tV$z~>)Qu|xDK`IAzq~V@&D0D3x z_VXZi0Df)WI#E2Nxh#s0b-9R@rrgF)c;T@%zn}w;Il{j$?2Mn$;!4(jqUc=9yH6re zH(_uO>O_PXM_ilqVZu>R2!6r*cR4g#=|-vx<+QkbJ4Nyb0iRw$P-~85DNfv_Jpmb+ z>tG^`v77NUOmm5QQXRleC1EuioFcUKh6sJ`T5Ye}SYIThb00GtUBT+Ni^8r@8#2`Gv=BK-(yX zq(%VMDcXsFnHK*jf7WGP={164Gltg8lNH0|VR%09|TKjbyw8`x3U+He`vo4?| zzyPQCeB$*u<&T(Z-&M5M_{z)79T&c71k3*8hrplnaWjK1d3q1nmF9~Q;EJ>Sc+fZ{Q(b{Q_S3qu9hK@%#(TD?l?KrXX|^qWSm$! zrIzB4V~i$R@+Urh&Ht+!$hI;q9z&o4Q19jAg^!gDN>V$gSw6{!3C>wkd&U=5VGQ&| zV3_d+AD65xu8_9#Na!3f|F|j#>$DTCRx`nw5YAU4x!UEq6iAUVMvB9dr%GwmeFZr+ z7GPZ7pN~q0Zx4oA(kd`@bUt0&hL}?!Fom8!mY#BIr%OA>=47c zHThi8Db=zG?Y4sG$*49Ks@5QHK(;(^GV7i#)6V@^lC&<{y+$Uhyf3X!|L9IfYwf3? zi4VGIJE((UYtBx{Lj-%maUF!VO~77@$mUp?(lN$2qR}6Del3`X^zM(CW|pFK$@e~5 zt#I2{gSVv@C?iwAvH$(6xRiv>j|yin>T9_dk&kL`RVGfJ$TE*m(g8$M)aCpy@4~rk z{n!@1T(e|;{%(1PZ{bO#IMj_|c5F$}MRwX|hBEkdm!a(zi{FcdysZ7!8|#Et4XU&H z)zZ2`ZKW4aP2NJ!MU_OekMA@#e>S{LT`S2o5JeS*Wa1I}!%#>s8pOh5`bR>St#nVs zA~fyDpc5npqMiL$b^%@){N-A3e`3~5-4!T<`?`ns&FYru8Gp^k2_h+x6AYfF*MP^_ zhP>~)KGJ9NUyD|{`B2fg;(EK+sfm-#!uVGIXb%LV=t7(S7(iY6<0n@^)N1OKS%8Qy zkV!f`87$M^%Dm?B%^sM9?x^|(!y{^E#T*%bv=3-gw!O=G91WaN_7<@};VrkWqV_g_ zom248U{Z4z5d4?Hn5@<(TeT$k2uJi8%jKu6z}dgomB?>O!ozq*vg@U64aaK89~Wry>B9^K{EU22~ycr3Gheoo4?At;f` zm2G~X2UVCAHf6w;rp&X$RiS{?7ii|KIKQ5ylCAXRAvczDiTg!R?cAklN~%5kl$}xa z)8Wmi;W}_je`|qC@3djohWu|?RW_VERLO<}Q$Q|t{CI4H_RgZM4<*Ezl^IleYd%W( zo>{=W5Un1?pAg``m1YhI_nrCXiMf)I%iQphXCnOesUvJJLMN)?5~!~?%NDJwJNcuF#C+Dn zHD~1a32$ZI*Nt|{b5}ChrA95&p%ti=n1U}Akv|Pzqv~)UGju1d!>)N!6bo3dUvO0Y zfy`Ea1|Hb0(T~f{rglh0CX={Yv4EqU<}PfsQhnkMu6Wp46sY>9khrvhLGIugB1Ks9utBKxF8BGlr{U<}XcIBv5(lqGf#T4e{4Cs3o+7TIzvT znMBW@9(7%Cf0hazn#vaXdU3b-Ps1OnI+Rq3Fb$~m3)`V%96KI7>um)nfJ?vr*0-R%eXLPVaO*LZgVhfX+h-P%|jJEBB8c zW@qG&!ooElJl-dHJind!CYE&bQ?P;{?QqdqqS-SI(Wj4nQVt`miUu2vn|RLB@TSkp z*1VQ=p`S5wpYoV1dTALS2~g!<8F{{uP)tO z<_@<~D0OTeV43B!t-zG9L|DAu^sT6g5`!yIm-CxWDaWu!vBF!Z9(M+t(%lZzGsvnl z3Wxb$b?V*R&61%mAE!;+sLP5Jg{rd62gxnx!o)oIO2&Dw?E!vB3ulR9{L(7z_HMsc zE+9UFXF;*TBWXYe-_a(@w6(({ZW;C|c7)v`+)in{&c1IUcUkh9&@S%j1LC73XsiC7 z&q2~HUc(f*kNQIUt6l6`Q?DD5ug2qmVXXtoVSiK*ysfLib}5KifZX09xe3T-|L&?+ z>nDoRZNDA^+OjXAQZGM!n3%6F(AKT!?vr?**~wc*y6%Vk!|j~=A~W*&F8-ly!?wf@*STlp_g6wVUY4w^Bd`v)f3 zZt_Q-WGg4=1t|odPd$6)){(FajU_0Jy>ZS@3eT95I{6Y~ za)f0V1n|itM=v^4@qZa+jW}#|%+>vkwt3iBz`FdaX<0C5^=R1L+T{6&i_*{uH~QL? zCcpxeK|N1{i#e}81n*fp8E?@3ph=u-o1>s%57#pX-z9xIQvc%FF4Z)ty6!~-#X>$k zr6o|5c?#owCmtUs^ntE+l=u`K+f=N%IeHg-?HOH?${Imr=AjLg7$IZ)_s-UPpLbxq zE@I3fm}8mFgnXVi)>a+()lF>r(sT~TUN-qmVoaDeCGfVTbC*k|-?Wfv z)Ulg|s=#1jwdz%6x75VOgpd7ecKmf0X@j>tzz#m_#&Jb+7^ zSu4|va0(fufd>XzK-WR_DKwzNvV921=#3J)6)mvZ~&{J=DLtr*03JiUG>v; z*TJ{Ohp*wPqWHh>jiwqJFD=1i!Vrpfw8SgT-XBv^8Cr8F6N^UdhAvF^qBr*6#UDye z2)Y+m`X<@boIpIlq=@7h%Kg0Cx0h(%NKpcF<{5C5* z>#`_||GjMi8EP0Xi^)H6}5A{x1xC>6@olgG@W76@fQYSOEo<08uSDHER0h>7( zE|AblZ@nckKh^bU*Is4UpRS3R(N3vt>vM<9r3HBtIu~B2qeFo<40DaqBlKL4o z8zrvQ#_098mjJkGHqNzp%86mm(J9-}-68C(r<{ypJiHX!&KLfqT z$OX1;XF(m4VR4&N#wmzx>gjgsj%CW$o4i~E4h(Ay@I*>TwF_sWbu|gY4Cd<+X?;F& z74*hYvrLZbS(U5#HClyYu`KYtp_>y`r;VfVC!GGw3QPwI$jlEOhH(ffD0tt}qh7k< zd2QYzh(*TXR$y{QOw>kHHe4q$koT%5;;U|AgR*hY@JX8M;RyYpS}X49W?;D86AOs`ojJ33K%S$Zayo+`B6}Y_`SFR z4K2_RTCwKcIknzE;+>J*r$k35ijk6Lx6t?vuQ=txce-(9=B9BCKL-3)#>>c|Ss7o# zGcvkg8>l^%hwpyk+b09av6rrM&2FqbNmA~POA?H%G+B|UV0mUH3WJO5y@h9q1-{HF zre$=8b5~x}x%_@0QjC&S@%(IEnUfo(}qc4Xrkj88>6OZ|w7@ zAkh9g+lyNy1_z2Ajp;J2J8lb#=vAiYf2ZytX)c7-eF0KeeAh!UoKOw!L95AY0hA`LR66b+PzysQ+Rc5uoj3xWmA z<)qbIv9UQ}FUUaTBXj*xm!JItE&6NQF3!_gVJBIKp6wBSUG-}TQ% zbT?YE_S4P}skdo050wi>@~d52vVS8pKlM8fN5#VumL4cDq$LdKn2dcx?`~cZTn)&9@JSZNlw0tP9tV6X+%IMLes zeK(aFXE`Vh{{^5Ie@!Un1bep@Oe;yV?&ctrOt&sF54)g-Bq!PglI!NaJU%RA%+UK4 zKoE+qJ`!B9jfT*J#AcWu#ub%-+C|-ti@2H^TkivNGQdAZ?J3Xp3IA!<>uHeQ+KOck z5i|*L8WR3fr6;-#g-vM82?=(og?AuO_eQp7q<`4Lf)S|M+Gln#> z`7NKYnU`oW@n#+%GwiEh?k64$+s&~rD5c-@4d$$DXv3=KoVWKZ;`3nU4}E@M_kR2K za^$3jZTxb)7XNL@gE)MM>Lqc}AvZCLC-+WGr)9Nnlz)n`9cy{d8NXY};OzbTP3nLH zKCcqE7@l*0Hy;=dr8+yjYtIDY>GD#4iWJ@>2#I;JM;Zo}G6szhAH!Rpt0^KbCvIZM z|0gu;n<>PL%zVfj-oWR9=-RE z)4U!;DP7awN%?TwhRc&~X3Z=)x0}rgmOqKy_jq1upE^G)c%6UkTr86ZQ?V8Q`>n^o zmv+Uj$Yy_!#NU>1D+7==Ke#$Dey6!$AzXjWW#*E1KN1Jzf9X62);Ra#MM+aq=33@Q zN@)Ca;-kiZorS+dv3xPTIM-@5I=GW#X#=4Jo~Ix&QJWw_;##!Y2sV9QS+2Lf)f0#u zjEot#i4VG<%Y@HIBxL(T>wdZxyB__oN3nJb6v2TrJ5Y%uTbfxv>I`HD1cc6NENCny65ofB9d|p4CtOZ zWohB@tkiZnT=-O~eZ6Jep=<0qMRR)SpV%JAw$%BlPuk2>@v6#rdGZ!pg(X1N#ZQbc z98B8AJz6x602h5MMEk1lPFA_aCP5vU^C4ycD-x$pfR%_H88)%`@e)RmQ=V(s$fUt~ zZ@$@)FoWV#cJ2XSbO(>WDsDiQ@!MYVZMSmB?*0?`mKW<*I%zu&x3qHGq(|^3%1O>$ zk4S-psH}SG@?M^2hAD;L61-I)Is}#=h%H5tBHzu=b#VF*Wn5IHw4SA8?jLWOTq zp#zcE*wIOMI_IggdwZo8bH;1-M41=!RZCj0iIn3I&X*HDX(}A$wp$+4;>yHQJ)z5X zwY{xHOzp4L4b6yUrpbjbnJ|@g%&ZzN`*aoDE$deg0S-sH>29+oXc0S-1)GDT^*nu$ z(Wh8{ROwH7`2bp>?Cab++xM>JGru9lJ|y*GoC|mwQ3d;2673%UrB_OjhhbC^jJy?l z=2Q_xL}xT5{1Glmp66D%`b^Y-G1{Z#;xH2xkaM8{XA8%#$_o=S-(}7<8=f%eYH*{> z_k9VpTbx)AC96~@u9@{Z8@>y!)HssT(YNr^Uffo1#8*QjqgpX^64NL=`CIC1g0QQy~)x9lA zd^;y9E*d-LXH((cld^7J4!+Sv>Vvi#vMm>1giiaTBGJ%n!16npcjoa}DC zO`x82J+B1YXGUOzBLjJ$z}vC*4QsSeZ>v`Oul#bsXU#7IuKf0M5=4i6cz@2Q?v0LG zorNgkW_otGoC}9B;7KrYygsdo#vFUz!o~^19_jF`44w}^zMy1K_qCGe0w=EW*4LMy zgS}~{D8iFNpW)T%mb5}L9p5v@Xw(*t03XjF-;sXoQzrK`_0RM7d2AoP$F zI7pyy&KL9G(<9tU&G|B}EHv+Hg(8OoQZcNUnwMp89Kb_o0TxW*x^a)AL|Fhj#6#G6 zV+d?ZwEG_`5YiDGzm9&>S@$8bcUEQE8LWOac_4~d+=9Pd3?H~5R6i1RQ1khtouRy& z;gr5lHq2PwTO?)btWJo|^QsA|@HC~ijp1jVpm$Agx%rDfSag}a@1;;NXAdM%xDOi+ zB+t0reb0=4Fhtd=7t(H`{irjl4(cQNM<9Se?Pxe{KWG{kF15xvBO`$&;{@T&1^{^A zi~;2#|C+gg^(q3Tg7#mGIlpJ8CNm`~+)nT3?d+k_{rZ98dPdN@gDj)vU^b4W2rR- zIOWxDIK9-DQ7ozh?XjcF78XgLL4S~wAzkA97|T0zZgc^4BifKT_T+PdV9NeMG5O-g zCF|FFaVPFj(hxo_X&XxT++U_JE;_;7d1X0Jpdinr8WGVCP}rsP2l{+cH|<+(t39E9(m_Lu`t;dp6DLOUGCO(*VHvk-Jwz-s2h(hq z+=7P83j(dO$yF|je=xOby5Vhxf)6V13iAQ#6jl-iWe5QcBJ1_q*3+I;OIOv36ohHp z@vm<-yLwZ3pFfKef2DQtNU0Z{q-5OIIj8Bg5+ASgZ3b~B-}oi*O$?FhqELw^P&nbR z7F2t|ufDFX;ZED1w8~Z%=nuTevHnUZ!PiBoJhd>t_JoCaWt%dpn>0Ll=y7?=`vox0 zZgsg}S$i5z+rdk@1QsyIxvg4akgs(lo;heq)M{8k+Axk>Dp*#Z*&mgxrMBq0mq}|kaTOPq+~PNHGUF4V z$fYlo-S>cT4fDncEWemECQ{5_?&&OEDrO~X@#KXG_|%sJ9EGDDS9=}3y#yY_P@jw zJG&w})Al+eHKg!yWe`PL*S&pJzxhhN=imygFD^ii?7yI;*iv`13^}PQB;_$ojmCB7 zscEYzmQ<^+H{zaVU>vd!8kW-Iz7$ddO~`W$>bJm7j9?zaJ^$4|J3y-;Xjmpe<5X4` z3%mq;^%E+apO2WTzwExCp*&}S&WXu8NF7NZFXxMPksb5Ss|u*O=y|0y#8bbNwOrf7 z<7}ObG<1d?<YT3n)h%>ubeW?(BsAhT+Re&nC%G?NjNB8_IgBKjB z{mN(WIGqhNF@;+Ef=nB7duc!!N`epQ<5qVsqaL7llf=;6MPcx&a}n&FMlmsPJKb(3 zw`&|C4#?C!BxmK@cSmv4O67_5QO^s~P{tuH-5mCtc6GrWf2{ zNZy#c=;YIj1H_++W42K@;$W7=b`1k@5mz`Nz&Z6M1{c1MbX+j)$^$N>sH)HR=WOF& z6gWnN`Xgz2m#oLQcRaMm^#pu6(0iZ3?tw_K)L58|-$L=g^`GiqmUf?G6Yy)-M7w@Q z&{aNkg3dfX{DF?0GDg$y1%ZqL;{#rkA({sDW>#AirWaQ|i~Y;?=iD9Zg_q?#%Tb#nLOh zX55dDa6lJ&AWE|2$x}J(K$2?mVWZ?|hSNocecD9NQKJ6UWdkw=(1FX3I z;yW+~S}dSLE96y^9$nWB+GJkB`5Ba{BL5r@O)0urGtNIl5eJ^n{IhVEzQ;w{u-TR4 z9E3}g_S9H-TWZ%VKT>y%k>SzRdb5?LuT@u2QA zKn{{|e(1=S=LGFKP>*Fo;6u*e6@uCB|_ivcO_EpM>BJV=C)mrMQ@Bqx(s;tIrHw% zFoH;CGATTqfosz4-rxCCu&?+orhQ`2(Jr02nc}RV$#02?0&f{waFq!^TMRyY939Bb zNYLpChDpsm{uej_a|{TMBUg04VT7i+q2c?nc2l&od|tp9Wtq8#?d}}|lrY%>0S0D) z8+uyRqSbFeJ1P9TBBdNGXTY9_BiJq8ylZl!{xY5NoNtcVf|)VW5kK$+v0xHLQS@L? zJA}%3L|Qgr``s1Fu+2;r5x8>U{xMZT^nsVaCuO7AGwJlfnnZ$^3gn1aUbP<;GSnlr zWEBSR07&S>4**83M`&}P;GjrbY8!%1(JqaU+cpk3>OT(7|CLwhI5WIfgVGRem_8=r z(%?(`=+^jS(FQf9F5z|AhheCh8nw&EmG25ypHj@#)C=w5njG?)FDhGbiu_7 zSN@U43ykoXH;s3xI1?CC#KeT9=haj2ou8aJom4y=#)~l9(DI1I*MJedhoYyg_`bA2SiIV(JA}uIUU9`9 z1GxaEGB@~WH-*d1GwI6MXi`Zd<=$X_#b4Hse~bMKG8wKMW1%Wk0o!nnFVjn!DJ&_$ zM)LHZ^%UGkN3C0QkkTU7+IRL$RWSLx-2_Yp=4T^2B_^b%QQK>_DPwHJLz75{!GR`w1U9h~0 zlD;4R?`Al$^uMa-mXu$ZL?NTs**9AheCgM^4rM^#X4$<_nzCaZN1ZAEARBbiR^*F1 zwie;SoB%mr4lLU;cg49i-Dy_##N+mtf9@>2j5nY6b1JsLN)hVs@A^V^v&r`R4K^07}kGMtOB*-MVI@wE%6NH*y)Sw|6HrnM{D`xGR< z|4n6a?2*|WDNM|SkAt%H%{}skOL>Kr?;*K5)%+*pCEfpz#nNkn5yY|3Ma^OvDY{*% z?V(x5e}c(06Pf+BycI58$HlXMiO-#@@@Po%2&;sv&Eoc>Fcy~__4B@GmlLQIsD>ylPH@n;QecR;lPKz- zvS*%2cRPWvXfMJupzkTAw{II#AmvzXoI5CZpdQStn<`h8|1KY1H8bU;;W#lzAq_Qz zGD3qWOnIIe|BFg2>ND(L*E;DowG>PMqShyWIf*?njP4Q*m#o&fHhZEt=!XB?akSGO zS4Q~@C)L3!ytT)(QBNo$${Cx53zkn!&&1bK!HF{b6oJqVeEh~mU$Hg+X@R%M<(sOC zXTkRjSVuUlWbuc}TFmXIQ1ocx%qT~PHT_6LE6{y$@T9R2{yWDW-#1(oxan%-a#VXX z5kdp;q2}=V>ETLQN8v^ZoXZh6AEC2SE?5q=k$%u1v_58?9NgO{M9ZizLd&ebB%yaS z8=VKlcy9?gV;U{WWYU~j{jwY?FppUy941|Fy9w=$RXc{4g(^5taHBpv>r4}JzmWcd z6ttd3J^SsC!05WS^`~GB-S{ArPwI@in;{EKL}}V^Xz<1LfRN!{GqSo{#a8rf$bd>* zL(&Bce`Vj3689UD(l5|Jdt~3Wdf6$iM912ia|P>ajF(aX(Qbh|_B2f$Z)8 zFIB&K1?_k4dNs*}wc>-6@j;RM`>?aJxpx9_xYd2qVfL4gZjnvlw;})YS)7{fI5?;! z0GQxyM(}Eg@SPh&4cP{>?Y;0nt0gq0Z!xE^?O5ISm4i`VN~#djaE&e!&=>EHutPKgI%r5f324C#C9_C3tM;L2NR z3roGqRc@>*Leg|exo_@%u6c$meV8R+0SoK!kF4D@D_ARBbW`&vx{WyepmOik+%Zq6 zM$vkj|6mT1@s0*Obq!jw{x%Ha=oMxXV0ovCWLI%}tDGQ)-4@&2?wne#uD3K^=Aq^wabA-CE* zU+R$b&!UHv>f_3%>{{v?gOZ68-!BZV(_vMq%foE8P=v7V5sy{ElFj`^lzg2Tru@*A zo8(h6HF_axzM7DL?eCtKWwZsQA2R)>4OC6YFiRNDs-2Dv9DC!&b|tL*j-bm z(zfSV@HU5sx*Yb=cz&R@q~b4OOUCK_y%cP~L;RC5I!g~`$Ot8`t-bs#H?r(7yhC#d#WJz|NQMY?nWv3#X`%0AnakRo-fXpls@D_SU&EO7}WQL@i6RBJ*l`@^y+U!~1+2xO_ltwdBl`+Lrr7({r;+sMzC zLtEcAlCO}wu-Ciio49fjiDj({1&87RCR-n(_u3Gx8t<&f5tfST4d`%jc8i3pEtB<7NjR1nlkL|dcBH<&cv2fmv4NNJuAlMbYn%b`;3 zj3rHOYR(N$A3g9co>KiCl3Ofw_Cv11?+{B1Q43&X2X*cktmEr{w5%Jje1(L@+MIwm zz6uX~8~BwajL(=fc!flX?bTgs$Wiln8|YB*J|H6ruL|cIclg%QV<` z&+YH?sYJf5Eoi)}?eFuU)r3#>edM*R9PVD~W5(a#^kw`X!KTC7DQEx>yd^nQ&r5d; zLD(v(V}O)eTUr=q5m`z{5?BeD>E9PmQPy;WzRc4MwBT0rNx3hhmU8O#ct6=Jx?@;+ z{U(Rs|HA(t*bqnM@(-;br!|E0CDn8gROzIAyMuf?!Ms% zoZ36(FoYn@{kp#3ZO{?v16Y!)+F$)+O5m{Y?Cx3(4t{ibh1Jhw!# zfH}4V(ei$f6XG}|62sl6lI2>)6B^+SzAd7IBpH2RItEvveRAbLCx3`Vw>|cGiK>WQ zkX^v_0Pn3rmmcNjQ0uMA81*9aeTf1_2uf`^!w5HzNf}o+8oxX{n)>-%U2m}ZG000) zsjWX5k6h3H`CQV!jW8%XEd7bk^(=O%@I5pPFy7KADM9d?DawUf?O(+*W51L*BZ?Gg z4_|M68G}Mxmt$E8;K}NeL*=S8_i4bWsBk#)N7&!#ycGIh+!}WKCn49I{BA6ac^Ket z@ioZ&Z32OK^enRyVoSeqcno_{);xAq3o{|>tnwXGS3&KWL;O2#{ui~Vr<<(=Yb^bS z!wHC0ZOdE>N+qcG$==VBkL_atTUVCBxa=&geq&K%WXHy_^rFO=CjRQ@-%vpQd%8PI zI?Xss|4jH;;%t7uhE$yO$|h1QPAt>M@}8};d9L?K;#y9ts!`al({sxNW6By2#Pa{I z5$3d7lEMRS1+<^WPgqC1-D@D^VclGn=pYKuB0wss*J`52)=O&Jzb&f*k8>;;2G5Rr z=GG{(z1jTD-FF;vLH_U1{Y!2I$HDcjdcp!W*P>1@51)Wl#?qAtA~-Dpd#~JUK9G?M zE8HLWYVX^6+NCoNCYIEe4+)6NJ%`0!<7X^AtMK@>D$M)scAKlx-^ni{9~9Yt>K}CP zdFnIK$Gsd#Jmz28_V_tJwgjtH38>L*aF1>(PA?e0EqEL3sU^J*g~C#ctzQ3Wv$x-w5} z)Fr5E0N!Uz$-COQsYT211hTww!1`&|zlxSU_x}wE)9`6ARU8xbzV8=?Vq~R>n`F)K zzyQY@@@I`Yd0N)`kN``{D0-K9;%I?|*YkwMN7}g|weiJmr8TS*vYVs99G_sr7M5n6 z3B_%NqbG=23Px^CMSH*gjj|$S>aTbDw|#&Y5}&-V0odUZACYPr(AwWekHJCRbp%8N ztY0CyA)GDU({-Mzr%wiH%WB9Og*goXUbPm|XFdS=HLNSAjv?utOFd7?rp<-_1}YvK z@m^_hp3r5wROiqf_O>=8uhr%0M()teqoAkPaw3~m?x`#uz}zfVsR_WP-yz5kD~d+I z>P!4z2OukS1nL$SJ|%jr6}w>iX5%Aa16yfIsLRQ+wO2LLsNCD)?wKqK)!l*Dxq_a%9t$XXB%DfSN~>oyty*Q;CFWm(B3@CpkomqVk99_E~lj zchV0#CH}N7(gZdaj>)z21*!Ns&LtAw6LiG0{O)eOJLT`t&@;vV`l@02c^(QbV=L3` z>ggOrn}<@xuOEc(!rKR31WIh}sTL|?{E~fy-!a5s^jx!%bYa2unFy5;q~taO>su?Fqhr{QM=+u->r08cHQn z(F2b9`$SEPC_Ww10Q<0VPv zxuxZ0<>lTb3|?#x{XGN115BPX?!Nx@7;9VdrE{>(%*T`M<_4UgoAAibyKLKoEp+n< z0UZM?QE!TC7*?Q7Y;?TSv9e^^QXCo(5Xrlh7c(_%9wappba!iwFpBjrDd3dZr^mKG zuV*+`fU`m=9fHrf&&7YjBL#QwU}``GSE4R!q~ZU%DOcwE1~P5b|0^@)MxYrDvVzPb zvYFei?0RaS)c4Yjydm6kh`qmm*>%Fs%eEt86`>$`r)_fg>lLzPd4=M!#b0vVXy#14 zL&>T!)LduDpLvJ8Q8J6b^!j#$uJ2>pN>{Rp0?y6v_71XK$;Mjkqlx#Od8tOvt1nj=tpPIUG@JkuHWxCRmlWm@DMw*Prdm`1%?5m!H zt&|!RmuGcpr?&XtIneUmhKva8uWoT%@x%5|K7A;Mo zS_MpJ(pFLkmDt71$eX?T*l|RxC8jL00=?g6PbwfBh^^LG@fRl+ABKO6&@jz5BE^ak z|N7{;EB`yZIz|HiR9Mx?(LPYFX}r%(^zmRj@VDFgLkJtN_0rtCvz0~wK4Fm-x#hLU zx5EwC4Fru@n?pndRnw#itO%SZeA4{LiG&htP?|D147*VxYRO-|V(*%ZX!F`^%(9Az zq>4Dk6D5Cl;J+kSkSO|JsQarjHC_py4Yt{A5ve>>RR^8{JsFGMAEe6EZ6Ydwy_G8% zQc0Qf{U8O}bZbNzT|^K*hfwRX$@{TbE$>4; zLGQc~J`DOcZoa5d)whimW5)L8pv9PYW#hqM0|L>l2&!?P(go9Nc99A<<%-4+#=)3A ze~$o%fY5-!1B;_LA>*Fz)(~NWP)(@VEvE#nnN1RvGZRJld>9#wDaRuSrG%IF7qbG7 z42;W7Hz2TJ)p^3!-XhyN+srFx*5L@aM5!N*1*g1vvm3slMg^(TqH)(eOZav55`HwRkp#Y-f7eN#vvBag&azr1(pt!dD@EiC5gp5feEOh|2Bo$cQNn z+1S)ru2QEW3D6uxry03l5P&J^T0pMy>OUbG>Lss{{m35>6|Cvd@RhCQmiQ473Lho0 zqms+}Tttg>b2kf7^GwSKkE1^v%f{;V$_VeU$^KJR0L0O-R`y6-1$+0l8VSk%6LgR$ z7^ZJ6ho1zV5Q|7eD)f5E9z7-)s@Gj`!WnPnh37U)D_rCW~w||36EId>h^F1>dDrAATe4Oc6t`C_xU*NTLd zMwG2?wW#MRpFN6weH~Tm_fm&6()|Lqeh_r9GX8xGN4$E%f)G2*j`}@7``#+bfYdXub-1q1CJooaP^VmM*hmamF!^W!_A>jy9#66%O z98{u`H+R~A9YC}LY`_)F5MCFTg3crCa}CTEI5ZaKn4Ou0^bTHupaz+`Eo;r172)`6 zTJ{RM6#~4wcODmZRQ01Q?M0%KWa720&^t4z#+$dhbX5`FMlG*TnxW6i3Z6^JRFQ$; z>K4D0MP@HQCtv`yV#%DE#Z?HTF1EWqcqyloUPxmS@OEP85Q}FLU=iB8MvMAOy&KsQ zRuz^#a(~GXEJE$ev_aAclMqfAz)+}gC-7Dr_(36>)$nNJV+;J4z>d9E(6ai;?&hbMKT*!RZU`Y1zBJD94eXHEI$PcMR7?i)vWX(3a z+HRN>?5o#^;nm=!_qqxFl6rjHaU;4wZNcXG;eQ+CL747u@h-_cDVE)_z+)P{k@tiz z8NK>lIkbG!1AWiY52k~`Zh~b5Cu3BgQ>z^*cHWx6_80}N%hw<=W%M)`GGf>Yj8#Ox zX=1lr5P(!3F=(sRU^tHkpjC>a3nQ#+{l?#eMaRhhHYBiJ*inf^j>o)`{{u+#j8LBV z)@UuD(Y|oj=n7buM-hmN0xu937EEc>`K<1qsgJxi#GH_GX>G0TRRr1!`nRk!v1VuT4wE+Jxe?L>UCHra@?}h zb++?PferWsjsIVe@DBe9p2Q`qG5&>i|M3suJ*8qiOhM9(Ys*- z;vrsrIRpOX|LMPOeE}cD<4TI(QU-{EsMCHBp6qKu05tVx7KFJo*(jkdv)%=5UV-32 z>UOw0PAS|Jds(6Xcy$B4MCGo;5B-v1je$d(F$}NY4W^Xr-VCGlK!jHTWLo9v37QzX=xUZ{e*&7QF@2^FU7oGJIf^SC zx*Y%)0}mlK5bf+g%RaXJYU}44d!R#1( z?@UJ5X!c(qT${7qa77UJC08-zIx*-OFKsDsRK5)o3(Q-cp^X&nw^)6YBfBl>40(-Uzm7qQne6I5PRobTI+_udvAjy!eg5Z zn6HvS@uHkDc`8YpV%qB6uB3{N4r}L)(3mpD1p+WL+IF_6_I2uZI!0qSBVu}f!gh|owtiF<*l@GSHhudeWufrolKR-nr>kMl^l zQlqCj9N*Ss0}|@j24U^5KAVR5|+e=iCFcx%brbI;mTL*V`l zbU6MXTmmFN3yQ+Txq={GysAFMIXY79i59Y(0TZXw z_P}O;ZJa2Z5ZG1?fs_&*e8Ik*8Xi$v7^QXjQoz!#xV!~BR_@y~mhIYla&sU|oI8U6 zpLMluEvy>+7?lW)v?8AF`#1e)r_c7N`*jroqw1?wkM8}IgMX?4fvh#H*H=1j;0uBr zWT#W=08ihhsh#{wjux`sbIBEM`iQL%kFh!XF*t3PdBa~3e5QT&rTCKyFhy1%jyxWM zA>WoV=M?%h?O9jjlrxzBDtmsE2QV+_DUdLE2p}!g{fIvpjI{o^xrR0l0fS?}KH zhN>=kNX&27f%Im*<_LSlSWQq}OT9z$U);|G`XEy3=BUGdXrlU{Om-SbQXazp3lze2 z^vct&6a`*H#G}>l>-f=5P%mJ~9hPM|8lVWkBR%#4y4f+oBFpoHVu{Q~T)#Pa20I{v5HoKc zK_G+1Jea_JR0=_X;Q*ysbdI|LbaO;TxKQ%vFvAMcAsU+jKnK4d8}_Y*9ubVoQHz?! zoS}3$Psor$W?z>~=@LdnMo_h04;*AMWfO_ZZVZF^rl4W%+_Ww*z&_HUfog27mIXhA zq6N)T1}o+D2+PvQ5XCkH$7N#29`%ev+$200(3$h+w<{ z=*onmlZ*RG1G3e3gN-3n5keh8OXsY!ZN^S-s(Rww9}|MDG*Fj`F&{>%{_Jd&5`y^K zfV9CoWUELuRIFwC%_4BimAQ5vdH0Zm2!12iEs#=F!#-FKaO1zAnK&1cU>dY~Om&^5 zCp%NuSD{iIka1iZG%D`-GHJ9dFn8o#hyq8j=5iYdSBxw?I5%CBXP@D?06Y)@8d}Wv zw9m8-9)_Ln^P~q}im(`}p=OvqP3ZJ8AX+}*^_Ju;mAO0$(DoBRLccS|a~dF|wK@B2 zlG6aQf`BXnE2%sZmA^K!EM*tkw*@R@?7I&q%#t#?Nax8L%>2lu*#iUV*sv%Bg*QzA z4uzY90fr%$0*2vm#ARd>!R^MFZc!DFBC}z%LW3l@B2{bJNToF3)$vVR9d~7XY&?Id zqO3d@oKJ9r!6rx|7JESO6G-7$F$bzWl4}F3R`Zrmua_7Gi_S2{+ zGyn}w=^B<-r=Pbz4<3OY8E9Yp|L$9WzeJ|HQu%`FK-Z82*I_Ia1O%=r%X-5Y(epuP znN>nlCVas-I`D?UK-N%0;Z8`UxcrcI{ra~@@L9$*EgJ>@QjJc+xE!|%13z&=>^T%)VGv$fjg+~grH5#4N}Gk zjVPU7qMc%{dhay*YC*Udq_S1NGUBpJkuSbY@KYaDU4PWqu&$C1GOONg2MmLdUV7i9 zGv$oBRS&YSw^>6JC5LCS0wl}iVmF(psqeZ#3>=L`tMEjJo4#kfS)EgvD}iBW z)bfbk(iFjL4~dp~&eQzT*_1*9MYJ1n+_aO}7g(C@6N(^xKLB2Vcs!?% zY+b(}FAZ{I4)VGinJlb1m_-h17uB+p)-314z5|->wJU)`hOCCtsT4of6UajVQJRN< zn>>|xM3^~Ll(a?A_KTGJ+{o2yR!dvllpL=f$W=c_W_wpV1LdF8DQ^i=x=Xylvj)@K zg9v)VD>u5Y!4>gkmY_30!0F9C2s2%{C8qKbX{Ua7#c z&Vq)g?u>vS1Ptf-mM&%f7+n+~e34lSGQ+Q(1IvhkPJN^?GDDR6cc6p}itLZ|EC+5D z4LT5MV20v4S8xb}PP?rJF0}zCr_ilsKGSZT<9GMs4>}fdnt5J@(QlK)sx7ioJ1N3fu3EMUs#A|sB6za&# zcL+6JM$?WsU*%DVziViTirkN=szh9;B@HFV{K6g8MM%ttygpXF%?+w3h?NKo?$_r8GoC zm~7~6#Ldj+oL}z%+A5&{-7y_{oL3%r=;6z0@qa}-E81<(@L84po$HcqC$DZ}%De7C zePuF`6Kb!DR&bFw#9rWoq+?K;^v0S9H|9SyO`hM48@$&|do~CJtd7$<2^5*mU|yY? z9g_q>Xe0>SO{H#3NAL(~Gb5r$DyLB~*y6SdU9aIycS81F8tt7SQf*Bbo#s$<`!Cg} zqk8ZD2e;y~GtHpz-!VbP`WLoKDWVXZ6yhqv#!s()w0b^c;H)B##Cp1(65`7MgZ?`N z5ww|oLaz?!5%Ufl-(()3v`KU!4=L!-8kXrA)!~@Nw9)2euREd4v@&c%S2IT1KBa7D zE-V-lbGhuTFa9kUN;I>pe{~RK4wqqhjQRDk#wjF5+A+mXVl$(Z{;~rja((1_zAU&Z zU;xeTH9E6dTy2rf{s&HwTw7aeC5a6s5uPBD9udkLi-aKQ<5w(@ zHP;<|9C}h?pF7FQza!M}zpPRw|;FRRfApYlM{V7@g27ZN@joA#{|2d1( zTzErM_@d9LCz!xL^Yc;&`xMTGS|Vo7jGQe@+9aO;b@z2Sbp-Do86dsr0%!vKd*FIT zU$6bP7$qYJ$|BsB#f3y0n-h(5Q{cEn*a{M|2Y6i^tjdhyT z2`x1|^M4D*4c1|rLP1M45>ZbB)4-Sx^oYhBYs3~bHiHos2n2JSM#ca@*|-s70xa1j z_J6wHGfZ><+&4m)rGLFi@Uqg(CxR;2<4{M~D_be>#O6UF8WSAtjyby9W;@BZg4v%y zPm3NGG|^GoA$K@#gI|xVG zuB~z`*9){smMd1Al_`%tv@_qptS<9*(t0LS)g|BwVnB4a()oCeQJBJ9T zRF8F2WhF^B=e~PmxpJ=ZZexY545NDWaipPLD;E5j%gJixgW&w-?4m-M6qMharC=wf zg~$lZL0WULZ#|s(NynQnBvlw(xzHMszYuC~WYA9nC!II{0v2R*Tr9nSfba|B^b_V% zRU+zSSQTzI2Jf1J1lT-kAc-b8k{t}%g2degc2JFmKpYM=OC!Ma8<&ZGc^O-n-ED+HRa>}Yqsl^s$B?m$%WCu+!?c6DyWM1&UT z3|M5UPXi816Eqlce%+`vN2fr^b`_7xTr|pck?AyBWIz@8Aec$jb>8QI z(>sFN<=tEX?ZJ@i-%@`%EasTn4g#faF0R0}v8HYNSO~7^_|&>DbKF89RN1n)%`={w zHPn?s9LGeo3%Ox^tdJFHk%jo26mB2|zUBpp)uW3XLY=1I73FT49ZJH9swE1xQ5FRg ztI`#4P(K+gkh~JQM!#WUrsqHz;tSAJdv`;`6H|fL&!y_5c{fz9QBPa|_(q2Dny!=_ z7}T$&o*&{8*JXfc6*Ju>xtHPY+z7^&=U^xfwT+@av~I9WZhANqkV16McK34XvP zi=MhO`iq!n`g(e*%PEO4M;$=@NP&T}kA#90{rPmQt;+~;iH#l@&SFeC1YTn1riQP# z7}i%#d2vJoG{8U$#9H2c!>8a0y>uML__iV&b{8lJn-2#EiKAU^R)0D~iJso28<(Ip zx6}$Ce#)>GN@$6u7sl!xrM1eBxHg8Xr6I5mM#MmKrq$V=ch}AASk4_wM*YX_+HCw* zuUq_nPQaE^5TTf4SCDkxHrcl@ zf+n#>q~pu0RFtUhT@YTtMTe3(;zY?|m)&~YJO9eG@02X{9;{5| z?z-~$}07Qy_hi4ec{1h z6lb0r;{Bq%;)(uLx|$z%55Ip#vm-FPqNhh-PjsWn0TM@rJn8~$4c@k*0=${y(Iy4j zYm`hnwLXcMJ|hcwr{AfKep`FT5P06TQKXTqN2nXWAB3&y3gTc9$x|qe`+!i;Rjq?S*U4L zH9EBE<(yzsaFdVHq^~m90ND}Tui0r;+r~ss8+59P zs;4DkD3}rZ1TCq(3d7^F{f5_FZKH}KUj9@k``*%4x1Y%fGEo%~-)kD55`>3lRksFw zJ_phJ3dJZ^nZW>#^xZ)(N?hFxbW;(_cUDlPO}A9Dw1JKGTZw` zN@gN^xudiD^}f2w&ZJwFg;>0pR3Q9DvEDmsmdm0O zr!ZEIihiYO?EAD|rmZ7~WKjW(nyLnf?6NY!QN4Vm_e(Vu%ve-ffacH=Yl`5BuJDaK z?u6fA!A1fF0`r*|q9fOdKD23f9Ksv}r+IJ9jou@|j(8YUYh1s%DMS;DpOLx+&}OPj zEB2_&p_o;G{g8%93t#_6tDy1-m9!dA65ZO=ViO$lmU$wjWX^Si{TR-h2ZifkcS~K; z&2~@JYB#LS(O%F9sM=PotDtIhYt}gNbV*cg4_EVMPOX#GQydyr5#SybEM9$elGGJB zb&b=|6lCcP*=gtQw&9GxmOPmCb}X>&sR!%;cY_j}+3hi?@M}yz!q`z6?gZ_%$cU=O z_BL$kg7cg$yl=6W;HW_pygDf?+bQ#ETpk2B)k>Y?XiwJeC*k_;-D4u5maf^om6)LH ziA))m&Bk5IQqh|J2oWkvbg(QLANQ$4dW{sF7^^k4<0IUki0!atDI|4Wg|Na*io;7iJq7;+7ntRx~Pm&3kKp-7);d6D@B(M zKZaeDb3wq@4(Ymr!KZv4hob8uMzgw_teDww8PF0Zf$K=5)pd80bra~~gdxx{)9d?{ z-sT-IDvkFUrO!2A=ga`heZTXo;uL*W8AI^{CLEJN%j-;J%)J`58Y% zE{tRQB{yvY9ETGztW(Nd_NwsnBVd@pi|$qysXSHWtyh;ONXud;|wT1$38Bk~o3!QLfmBQN5qd zuNzXl)QwlmB)ifh*jEt|r?eV!hIgZKUojP^VtAv)LiVRrqb^nV9|)uB_L;h+x*LZM z#q=)Mm_CmyA+sH-4g5{r1!4p``{~N)HpzM||8_OEmgFG!s%@0=Zm7(T--8*fbqg78 z?x5PT#0W4$43ETCC0C7;eMI%+xz(Q^vjcL$U`eID{9t-@>)YStX2;>2cMmDoOZxiC z2*iq;Ky~1iQ{Is)Lfv6KC%e+`H5b#D!1ZlqT3@toEB|f9pNS#Z`}G7f07P;)Z96;ai!Q5qpe$JR>)Qbn&uvzI zpgO#66l8p}s^#E6Yt^?M?Vedc%6<$7c~56*3uM|KL!Rdw87imgp%q~_YQ{CXtFUPK*kl)4z!z%99K*)z?YP{Y%~YRo#szRGD;e8I1WKDKBaoiT zU2IvuMWb<_d-r@0cFazK0fz11ZX@6J^y{p4GseBRt7w!>kJzWz72aWYsOhsw(ko7Id z;EXxi(Vms5`|(VOmHJ$$otO_N{!h-esL8DE+u@J@K)YGP5EG24YazKM_0A7CId7Q?5xIdGr8Z= z%^MN$AG2Fo=ssxA1}_KL{*$N~9lg`HM-}wi>uJlfquhfGFyu4KsF&H{>^M_Odxes@ zkO{T&>>Z6K;O)r{8`ey^PMVy7F3D*Y=DQ zPgeoHEoAWZS(zR0Q^94#2MenM36GaWDl5Q62~HG0!|^bNL3Nesa-jy$#>q47_$R^9 zs<|Ke&>~WoxRa>}R3IXXrss5!eyHg*d8 z;{Tyw*hv75RHwPL-UQ%3{E=d4x3S|b%~D#96=iM%$1g=+W|m00!LJsa_;W#)7}XXe zPXS}H97{}e@Jl?nbn7m2aj0`u2`R(W=|i`@({1g?340jy1(cSx;`gKgp#{zhl4pCs zr3jTr6xinq0$A1FV1T?b-gi_Wb0*!?HtU#`{OxVsc18{x0FeemyH~hr-P!Nqvepz2 zO%#;FXLwwN#5n6!1ywJE*49n$I5k9QIETpX5-gGF+{dZYh22E5jwTDAew&~!b*OAm zOG8S~;R}>}Or(j}q41Gu@-q)yKJI4{`R5%5~v0j%EJ{s zrFE_gxRB7jIz>au9SfbrIu9Q};b+8r)-A_cZT^H+L5Q^|GUgN}R72(o_K;Ch!xfS|G=5H|{atb?JPMp!-Hn zw-v7ux^#xc?5+HIv`(YQFwa~)81eXe*!hB8BSc3~ubrixHiT1XMg=EWAY5}~Or~=x zb?JdJIm^qD`NA|dnf}X2b%|6Y)7z4|_&FXb#WAOV?~ob|_^1n2${RG-YS34G0A@>9 zsA$(gdSyPGAY9nmd?3 z^}k@`5;c<59T`|qtOmhZE<7;}N;T^X>KW0J0t`X-jy&k(v{qCW6?6g@vMfocx2t${ z`oVM!1R{4y#TSrhcJ+Nv)0bt9mPhzgOgGGGmh&5P%iTtNuKb~*&O>b3Xw&yZ#$5?p z=du~u-W={5U5rg!aQ_9F+}Ej#i?j~EE2~3oB~!3(VKeQTBK3ZynJabia(>E#^EsxI z3BcWTB{1(?lPu>>q)L@)-;%6gw`nfJStXj-27EsXkd3h{X{)Q1zPfT7%sg||w~zIq zbp~B!Om?Kc%cMn3-itd7caHc~IF3PIpl^UHskP zkpMH!)V7dae$%~KHrF8KuDU#Fds}@_#iwiIqbohpz6CanYdP08Sf&fJ^q(PwP|$F8 z*G#-GP}!JsW8=scvra*={df9OqR;$56yKWm6wgXxYm#< z3F?DthD{NXenz=^u8j%a-xBV=l7syKsT6N zrx|&VT5sHx^&0A)iWM}yn7}2ZV$JtQ1efwZ0_#5_O+LAnVAHR z3q{`v>gS=%DXwy3iKK>$Ec_zZUrDq9AuuA0 z0enV3{&_|Vl|44A>Zf#3zXGs%Jnbnf=goyGYbDTUkPajUM?`ktrpwPwYhBuz_NJJ_ zLTceuWIsAP# znUVf@bPFQnmeDPj&-c%tf_Ue>S+?UCh{-+J?)%ouyd$La<0{f-vTG#fm1*t{i5Bwn zySt85o?70EmdICa?}+QR307XeQot?Pz6?{_`$&8Yy7Kfb(96%^FLi&%NTz6mfA$Lp zeo)T1yTL<^~Ngu^G5l18x)`J!X8}z zHR2^Ml~nSKy^S3a#t3pho9qY4bo?E5aQUEqp%{@W!xmn@tEqa9rfsS9?#zN8)AmVT z)Jsv}WPzhKz(R>^ZYWqYViEp>dIc-saPm`%z0K=6Z&Af-KMhCkkkWUrjOf*`ZDw0 z6{1njh!7bcAm7sP0o}8|wom?`|E%>_(|;_il?#6ZyQBX=pOcV_}LmoGg>=fme~;==`+!8U{3+}b4w6m z)xgs1eo+%i#@Vl@+YL6A7rb5eG|LNKRC`P$8CKel6=7*xMuEF|&n(C4W(jhhyzg%7 zjYs~iD@#8(F%VDR9SyuVA$58S9raeq@Art?Ib^$i)zLjils%Pft>2HNp(st48@#vK z9nRlwOn-Fgi8p;Kwh7#3L3>ZxT}K~X6Xcc^4}N#{ySFz7_RI|lHAqQ1_US4}&y_rk zRT`~U3wc_95PEda4;i-~d;iw^*-PT2kJJ)pNn%QpKl!HobwIihF_|;_U3qjM&FYZx zFo`vP;=6kmRG{6k&!Z2aBZRb!bck;&`k_0VLRF(>4>Sj+P_zY}zQ^@`FgQitqFL5$ zajr3o>ob`K_BWaGq541&yM_T_+x;Ba?}4;iFMX+Mzm>KAZG~;MXT>8Z9LK?ajfQ?S zlb(Cju?O52Npi9=L{jj?1;NFm{An_tfs+d0m|vAx@Ow?^?VMObnE)`Og-eOt6j4YmO zJ@7AHq6hbv*ZnZZ-P2gz`-T74SbeMr%MK;N0)iD)Mh}@nDg=`=waC%ZpQ2__ zXS=@Q{wm8Gcg_CWBZ}z7h!}SW+e_P9Gf;5*i{?fac4_`9ta;DM?$bsm&oqfw9;@`} zi9vM#UeBT-}ty;pJ#7!Pyx(X}uMGGi7KW%-AGj63D z6gx4_dAd&AQpYPl2S@FFN{nKf|Cd_^c3X98pFE*_IK6j8AyDb$VaLL3)%=}lRLd-D*+H`wz2wEzfGPB5qWugBq(dnYg=&2 zKa+~ZhMnIRM}5Bp|0(63jo=KP*g5ltvaP1hKtVgLq4jm=*NAng;uSx6C95z48Ht&# ziwoO5QgBtd=O{o#S%aTz=Nhj5P|C)>z`e}UCF~BlN4JvjX6)UY%F|9B_<4JFII%hw z&yP=KSRN?M3nf?gb7<#o`r1)Ll@IT4RzIDg6BG>-faU4X4&jt>7YNO-;uJjO)xF&W zP@%@t|Kzoy7AXoBDeKaEa?qIy9{YYWuHdfW+K+>)dHB>HKEml;N}r;SbECJ0rT1~6 zJ8mBp@ctGZZN%T1`k{}jEX3Eng~Ji~Jo4Q9laB}d57Kp8nFi{XC1+W5LirCeYEOf3 z!-KiKF7By(b$sIqbC*R5*!@Z#{PS{qjyCTYOIaKYg>_!T4Yt-f3QHNUw&&qlM}o;% zk%reqAkEVAbfsqHzRz6OeXe>1NUotkR5D&5rJ(h5X8+mVAUW@xZjlK+%joD!n*BML z{#)nw@k(x~{?X|j4vqq@;{w5!bC>m>8;pe!IfC8glpM)Rqf)jKT{shi+TJ_~#2^Fi&- zR#xPem8drx5`Q4>=lRthLz5`8rm)kG4|PSG|lmk>4X zJS%kp`Q84ksN`E+x!eIgX63GVvloHZk~0vk<>A@E4{Dp^ZQ5cyX*=5G>iKxUcSeHl zdq!Yy!bNky3XxxJ+1L{QBNui){!|&_(UkRMgX@)LjN}FD*pyw^-z+lr2uQ43E9Lj7 z(S(*wW+KYKj|0Cu?+*5LT2ShId%RS2tU-xmP0?img8>*s#l*xIyp*M#YHYHmocva| zKZi3KxBUiPV0|my+~K|51fV`44CNT|6OR599xy$*cNiZ(cj1SS%BZcp(VNylF&y!P zRRa}{Lk_P)W%j792VAa`@BSUob^*!P)V40Na`@zv>&L`i-)im_Am?qszy4M!Hx5#v z=6PqBERhVp1fRs^#CdM#K1kfJwYuZxWX@|J6!-Cua)e~jfd%6v_iI3|kCl7CmO;U2 z%TWZ4XM!YCL?Z_;oX?V?T9b#v6%=a3eup^-8Kz8M$56GoM<#Af5OMTv8z+-Atm!`t z{&AuDp5g0t#^+_pd?|C1^pYL zfX4&T5O@CQ__hwmGg-tm6C2m5)_xhw+2J682FWoI?zvW0iZRZte~n^!sXUg2WIl+vX34vcn4*ewG)nRk8`Wdb!a*DXfjCra9A^(9m zJ~|Ft?J@&b-sXN#zYYH6erPim4{=Z$>Sk=2#U^e)?9BQR!dLK@P3$Y>PkxorOYz@7 zwYt<_2uxomb?ONTfOdvZ+kMT;IbVFL`ShIwOFdB>*eMd0qP?mUJI{beLUZ0uTE8PN zWq_acT5CLfTbSQbG`ish~M-6?e?AIy|Lrmnv zaYFCCPjRiczmoRw3J zbloYVl+smc{SdBlSby--qo|&a@VKa~R9x5ZhbSTDqqVvF+pl%q$kZF0AOj%}t;B7r zezhq*HYhYdHE$sJAlBR0C>cwj4s*SMSwHGVZMgSY{v&l2-$$g1qN?Hpyo_JeSMI4) zzrr7i_B`FBrJT_A;O$4ZNL)#Bx_+L}$^6krN^#46gQcXo<95e=k9deSO<#yVl=b5e z@K9Muw1RM``?uxJ4{cPk33V1IXcd;cWB!yKC_NbjOUyY16HX3)esA0_)nsWQBqTF_ z_QGoCjtSaD&M1`hd-=wk?Y0yKrnP(Qwj3*VPvnfT$+nRp{UTO_``$JS)%RN6LAsf& z{OjW2j#AFLXXiZ3{$%xD6Z?p+9@wuG<|df?etDro5IVc-rf0%^&Bd<*z`A=JtXIl* zf3xU${Fo4$Zy@b;@r2K>tbpTNvY*L6Vkg52&Jul7&M_>Vw|~|Biy;BAQ#t6^yGqIB zU*5}*lIE#T-v$B-6SFv}b^TsA9m${t(H?SMw{;!;COLu_Gob2XM-KvQ(B{NYVG(ggRnU|3XWwl@biOl<+i(ySOy1&#*D3VMjJr2=A%AGYS!mnEqK_A_vX2t& zd-op>`{Q8>l+FAs^08{X%JEtW3cjc)a_DIV<`Gu`AW_$iF3Mp6OU{Am>D-4_ZY*-p zl{e*Y#JhicX&!dlwsiCG_iJg3j9&0Fs>k}1Cekz31Lca4uLQH=q&{3%+aegJaP8T~ zZk)6Lz&%lZh0dccOYds_`BN_>6gr}C^|9Dz%D#7`PUf&g16^~uScUEPI1-&n#h?sD z?LtgV!W7ZC!AT(43kMFeO}W14y+>lS_ukhiSvfs_%%kle&!U1YV}7x+Px3y^v_iM+ zv2fy-#Ex#?=BOms&v@^1gX#ruRVE88_lEWPOPuEHxk3t)l;rF~eJ~*&o;yo39W*&# zTNPXP+`1~49aCg|lk%x}>$gjGPoR<>hwR=!-F`0l01WLypFs(U`g=RpZn*QgUC5sj z$;Stt@DTrl9Ls2U^HBWBlRo=>QTUGN;(7S1Pg2R^x3nUOylf>e?3Y>rPMIHxB2U9cHqSdJ_>9P}KWow2zjK9>>ufJA0miwAn^`*bb;aDaAym8ILCf&6iO7Vk< z3He)=>#g}#wfbe2T3hMZOJ6aTLIRnunTbHJ_3H_>8n>C8xWTJ)X7Lu~dkzQF%*OD>$L6`bgoaSciA(-7+4sgib^b1%pN314 zuA_8jzQ`}F@cx%=}WIvQ!eJ26(GM~3{HuV>203l&#CCP zcrJ+?dFO+zcqM%LX(LNa;nxOi)8=_sb-dE-Ll8G%IV9`ezMYZ6+e!+{`+-)$&)XXJ z(w~R6%Y1rPTu0#Z;Z`1hfMl<2X)1?Ve)V=w-LmuQsP{jT)^tL_(|KB_&7A`HHH{~On+&WysFz>LYPZ)gl)-w#^IOrwjP6tNol=;#c^7!5QE+DZBOQ>FbiEg6sSw^&o7=SEiN}-HP+L55v|MiUQcy zrK!YMxPUITg!~9T`zo14s~!FBTR_9KJLs{Bb{snuBg z-+e% zvG%KPyi(B+wxqV#$0c%_zBziSDsqs^~2i5@P!3;Raw`MtY=|_3BF2fI@rr1O!U#w#P#uTs2 z(E{_mg56~O-by^3L`(OT9?M8`G!ov7pbPltjqVcsYqzJUZuGJWDc+ZpcIZ8)MrY@%5~dm%^Wo#Hi}=ZRL02 z<6d3iJ0#%Rs`EBK+D1HS_kc>ThxueOY%BFyJue&+$_!^nc$LcvwLJOy-VEN}I%)FYc|-~vj9KxztNRlADD#40{2mL;t>}sU z6AMyp`p%V%D&@7=ksA%&K{QV{>A8Jk`Z6o*V2}7`QKG%?j{*G0WITC+#lM!FrJ=5I z12tv0;ZKY5S-kb^eU%+{VYi3w*GZZ1C8U-mi3~eGtlZ^84iTsf;d8mr-?mJy67-b` zE)T?yn__oN=)Cyg`eMrZX+zO4>67j4yN@)Zn9wAtKYAh}ukl|Nf1gPdo={17=Bwml zvRqvM_?Ho2j+8oLvR+m{78h5LGcooapMB$JUp(?h$@zxvM5<5T!S^SgW$f%!DLxy& z`0UExv48FKvxd0E=fjAvoehC0zl0`p-s1o;A@VdVb~1lfcJj-FCUs{AXABWhx^XK` zJk6>EU)%W0ZZc=GxRQ(6&A9`$g6O49EJZvV144(h>JQ92CT{cn10}vjRGuUqu40sd zUvvTkuRnI*sZ`(d=J+#zGP*VH;_c6ohFP8~9%J4@ug_Y?9<^V&1%2i2uh<;N^b1H~@YC+d-6MtBeX*M)>f@_L>8eITsH0=_q~r z2Wslk?Un2FRF&5}&v$2n_~t4KJb+0{FLgLS*R`SO`i+JnHW>d^;}25~96kI^BsI#= zgCRe?>6qM{5mvkakYHaD8o;|-t_PGlNu5-ZspB+qyVIR8=9e;g5GPhG@0(t#B za&yE3+9g%bmSvB>LOg1cQ`!x}W^Dhcz{$^Yhf-Sr6V464l?8E1ya?w)Or+#OGV;*NLahD~s%r5|XRtpA6q9 zN0QKEm&38FmUe>&F$SMhKlX#$HD&IhaseTPj*|c;KVR>T8%5k^;h!^uMM+y(rOD-l z7D1auL&ZAboX@^z&Dk1z_9{QnsA9EWs~f^;;JS{Wy7!SpyP_4tYc(04k0k}-KgTg1 z;h!AsySaB*73NLFY$S(H=BVG^J-3fvUqL%WDk8aXVlgHg`$un(7g_f`ldgU9AHa6z zUggCb4Q9a)0>z3U@Sb5(+NnivWJk&;TrqA{?Y=X@9k^31dJ+PS+UZ{W<$cD=Vc+ek zbx4*zYH?6I5!_2!Y&|wIpAjx%C}tAGO1%Vq8lpGFu%TOOm^|~&Yt2zMSJ>2ku=t^W z#P1w`mM~amcLGd~?13w@ql$qsWQgA6puSV%In@JI@h7`iH1~{NHa@nMrEa}$eG-JE zMyeAdT4$qi|5vI#v=5wkzU^-f#K-}y zpT&ROcKKNHfO6KB9+=#~x&O!1TSrCJz3;X%InC zI#d+tMmmQcQ0W*#7?goQVul#tJv^W9Z>@JN{(uDw&N}RU@B51T+H89nAei9cFDCvM z*3%$;@#c+YG<9*$=G$nO>5%4IxexW!35yCJ#=LAP=HB($>U8h;R^r?|-RZ4B_8edp zeY1>*WqZ=65TiMTY2Xz$l%hE6#5wPBE%6MLur!aLy!^~Luxc0tj5@kGx4n}EZXqIi;mL=>e^{Mh>ttS%*ce^iPtipZ&+GvsgM~mnS97HzyaBZmlpg%Cjm7A2?&KW0d z*6XF97Xic;%X~Dch4FIB?bpo&zshLf%Mmc(=#2nbfs@Jsj6?QJ!b+*f|7azRXqw?A zjuk4!mi`6x^AuH1uTD7+3gTo0ig{^6rn{JX@p^iHGlh)y6VR(1OPYvCz54J-W7}cv z%^PgR8{ZO*=n6T?y|hVBEP>7DUMOjj;HPD~Qx4%cKl8Qsm($Wy-r|)1ZTxYzfka8$ zWI%(KySmx*SgzF)0TZ7s~8YTtqiVYmKn>_r%_v z3oOwL9}kMGGS=~5MS8;$LlwTL?bGX&+x}}+Q|B)pn-Nz(e93KEPiCq3omAuo{!OoaZi`cy<+1kioV|r^)cjX_+s$ae=C=>2gk|LQ;FOmSl zix76VwR*x}qtS)=(zSGBnU>P;d}hnrG=f?lt|M1S7wqK=le{vHZdHH9cWG=5ax*Rw zwC%R5-vsqTZS9%;WbQX>u*FXKI&0gztrgOGyEtf?RGt`{G)(^d-RZvC>@yN9qjh1D z@cJs8Gj02AJO6$QhkCg8Ub5pKgPLKW`sQQ^l*lU$X}e|cQyyiDpULpruxDp@(~bLI ze4e4t)ZHE{_ii5=`nz@Viv}qRN6c~P3tNJ#551@kk%gst36?#?WX|UDv^!_J_)JQd zN-WxX_3ux{hXDzLQ=B6&8f&h9q+d=2N#1(Vuke-&w}=XRX`Y>se+Pb|6z2(?1&-Pd z7rLi&NkSv>Pl^2~N82EUZATriN?IGA7+No;43!jzAa;s|ThBba8m zx?dx^A%>lEG{3EYQyfbJoiMl@87Ud`P{L38+=om3B%{34{L`mQxka;`C=SV?w=vyi z`I{Bb*Q;M1a>|NK5Ag-2v2y})u21L#A|sQcaM+)NZ~E^yi$6DPSU?@We?>TAZp)rB zBK^S4$TS)YiLSv5$ui(L_GqMe_O{}03{b;Pkf&g6vC*BA=?^jQUgB^?xUnNbCICN`U{bU+Pmw!E}N;Bl&U%3JkVnLx{FC#iGtZOC8X?V1&}@b=Hi?Y2&6)XM%eK$`<864fbeRJ==}5~Upm@tsxrE!3Eh{Z=$B>Z0Y( zmoE+$nwE<3HY4aZlj%dUNfobSHe+t7fwx-M+6+4o@-mc=GdwTNg_*36dZDJ&{J)hC z$myGHYwMKHUTn>Roqz-rEhpypx%I{M{}0X^PJ}9t%(&ai2+%E;SmH;i4 z3?pFK=dKJdGURPv6XS)Go!GnAL1f#+_fif%rK0Km2?<1ICBdw&ZD{uwKd+1|vqq=L zY=|)w^XsAKq9zNGwP`GXYA~WQc|0lZRrmuj&+~{+NjF`i-IcUvMpN)r#R65uMEmoF z6e)PnY%uws7u`5A_mNNjgkTw+VJTj7^~iQg?RfQys}8oFvK|SR2ulC^XD{51oBWf8 z*2f_oNs*DC&I zcEKq{a|YKvcc%mrHbwfz%w&edJRo5J`Sd>PM5em37uPhbK#R-QkqHOo>bQ*-nA^5# z5{Zeksf68j$%_M?$9^10ehcY@|BH>Oak?&av@}!w!y#lVd)G5B?XTJ=N0zm2hKv&5 zT@};~#>@|UyIEV5=LY_kPNe2(K$~Pt+WP$N^aPsdfb#f?V+o@?FV8Z8W*wXh<9Tv# zsyl|O7&|OJ&avBsliVUtKjWABb!^!Su7AI)3GcEy8hC-BQawsxUjN9i%N;WhFUd_8 z-wb;bp{8{y-%}f4jK>yq_@8%XZP_y|KqYHBXUfvm9p#a~u8nWC{NUY_%-6{-s5@)S ze7srlC0{-t($UE98&y_Mv(m^^27_HdWOf^>`=|sK5lIgZvcj{vT=EoL4a2O$O4`Jh z=&t}v%2Sa8NTV3zPsCoB^017TCie1gaJC8yn`|fVA}m6n{#u2PnflQ!QD?;SjTZ#< z&0Ng~Ek9vt%~+Z)U(o-PzdcJ{n_!fL*IC2LIl&q^#Lf&x$8Mrb>0L2}<>8 z_AN-|Qd;9P=4D9b(q6<_tg<XSMTcKof2AQq&8-D#)Z|^LH{0I3* z`n7A}qINt*2X3+;Sy7r72f^_fLbo6{OfhWeZk7d@j+P2#dZ^Kkju;^nm72+*CEXOJJ!cok=YTdAAZ5+h4I0ASt67|XbYw~_-iFy7t0X@Rb zSwu4)^2!nF+T!`sC<-3Xc|HDATg6&?5wg`y#hqiifYNd2hk z!j@=+(e#gbN0!T_v@fks$sFS$L}ei<*=MinV1Ee>SKZ%rxs^V6?P*x@Il!>D{H~F* z;jI61IvEhe;NW(McsHqq87$TGhBC0nN}_0ui+$!6JMMzgh?0Fbd1Q06N;|=M0forw z20PQfj=I@8XDwj}Ok@pK@2>RO2F~T{pMK+h`Z}|iR2hz$$;XxCGcIRSQE@Dte6 zCmdD5-q#de%}(MQh3hUdO#3$#fZbmR9ndhW;3sPw@0am5VAcyLJ;@EDIKo%qz2~~l znmqS;ERS4@rOKw}!}RL8`-Jk)kVf*H7V_@0B;6nnM=}B9)4B)E_IeF_c|C;yXk~M= z+FXVtUQdWZcC1vNQdvy-ZLiCi4gY}Kkc*_Y>#a|TelUhdjeO+ox2ky< zmN-=dP-l|!w&bUF&eyvx?9c9M;TUMn=i;&JUAc`t@8O^cX6-=)Ebi#ezW-$iVH`Zc zzLKx2c!o1q($v@`E?E@^c?bdEJZwL81Qut=?!vEDX}uNGexdQl(bdqGJn87A$29cIp)cH;LV@h64q-(tmL&9+~f4LJnqH(JHa z)WF3zcWa<}3f(a};?6tkgMaRRT%advzj@h6jV68a;;*CJQ;ln~KPW)=cJ%FNCCK&J z(!izLR@n}#xuU9FnbDhL0mC2DAA9Q5r#iPnlf_u27nnuKkfhe{z8IvKMhEEBJXORK zeY&5qpD=;5qwgqm%qEXRsOeII%HPsMYNV=%TbWjB>^?m1%2!213Z!pl8>Bv5=J*7e zUZKF7)f}{YrkLGWx4#CL~XjbB?B|T84=W8U$MV&vI#r~(nohrZEHsZMA-pCn`r*(>t`oDb+@Nb_HOoE$N@~yw1RIkUeF%1T8H_W*P0GJAcb3H zAeQbJX~vWHupd{Ddh?0`aqM>gz>~hkd-|cK$<<3o?tN{%#Ym5IryzRWf4CbPyeTgv zY5|+yb;d?@iQPQC#cuBvi5%n(uhj*KBFT}Z1WG+HL+k#gXI1f77E0%2>iJg++S z^!axSmaPPDDleVoqkjLxL%i?XYRIJ3evUn(SE~u>>Zs+g7c*+&6Nn!yVJDO9lWH4E z_YFO>w9)c?gb2ZFvu z8Ca(-q70FS?lul$^@Cr7$@A=McvMmjyND$Er^4MCV@l_YrV+%MJ7EEoK@L*SmI$h7 z2IvDhlB@f@W7s544YLcT7Kck27QN~vEg=~eu)O%y@)}jogPLO4b`G?Ak?l<<7_m8S9OQf-Of!eVD{ZC4yGk!T$iO-O|?4ZjIw0 zl07HEE2ahQh5Wd`n8vc0zqh3|p~?42=kX_eL#Ws>I($<>A*ztN<+Od8xh**it;yyN zk;zZ~yS_MZ&7=LwvocKC`$ntR_C|>|LRVfYoP+6cq4+bbu=XgN^5KV{$wws|gtDHYfxF3Gh(m0dRc)?2$Bqx`BAF%Tjd#tQur2jmsI)%=H!35uX* zGmsRd=1zM7jOI#jU$}Ssg*)xr$CTo~Z2R=%#`?=C+o_M`gS8uRpUpCt)0%}%^#8qc z&h~SXuTyX(>eYl5-O{&QF&a)d%rZZq#DCI(F!J1~7-hBr=)tA19>q`K5}{nV)@DIz zMVj9|v7I#N+!*|_>A1#9|CPrVr7CRPE_3x>FmC0#8j>Q-lo?ZNfoD@!MtEUd2k`-y0*mIN5rnTTvU+$w$1pR`7iQpH_U){5|p7nH@FeR zj5&TW@4wfxD##O|eSBG+T<|gfZBR7b^k=g|*tPN+#NBp@ROHpbU6e-e_qxsZX{$xu zgYD24Fa7sVX`#l1H;@tDx1f6~N(Cqg^o-0SL;~Zk`#lvk%ueq10{6%#r*S zz%%11th30OOsf-%&M9Xw_j=p0J^lNC0`%^;6uThP+dwAb%rLO1gSTw&YtN?vhU_md zr42Vw_|Dz>{H15x2tka(pD}GxJF#65NBVEmZ|+<#TgA5(L6mk@cz&yYd1U^f^7&~H zH*mt&c7yi^Em$tvux~Y*l_{2)9cp6GYR`0>TR{4$x#5IrhQ7l}VNE@@4HGM>WnWYC z%xJA=5kB1DGnJE)dX)j2Mue8ZBsN%{HV~kFzaT>zGUK|h5jx!}@Z=}>%2I}g-e%2G z=lPEj<82kr&+1{jqdt(|>?utdzJx%!m9@t)U@{p6-XM_I^2N!lvU7dn?%|D78m!#! zQtIHMnCgl4!@7-dfjQ6@Ht|l?J{G`B9wa`fOm!Sm|97cLdtC*0ZFr(hxYtmfSxt?V z{4sR5M}lzZ$8YxjN!`pVXYX#EHOTW^O%1g@2q?Tzl9#>Y5r?l&INK$##iLCfk7S=# z%KF&Pzt?FFT@w-I_x;@t&|HoaMNw70M%}h2H0hYBmn0vY+2(PblqB3j{a#REjjL|S zB8BG+Z5_k?_4)Q?>s4*2-VNKtK|4M2j(lV$$FkVEOpCic1QS4*$lPUJ(4S-WKdYLL zC+DiZ$;kO-es+IOd~^$b?J8=Ew0plsf>v|SwH{N%ja?SZr zk}b&g`fws}#pcd|3bk*di0o#PBN;TY+$2dFxoH?CJNmefCpp7i_4)Os{J85u`FMNs z<)xj&G5lc=r!g(v(UHjDMn(N^yi203%1_0_gT>}D@Q>dWXf;1jx3B&a=@VBMXSbhk z{c3+5baaDZzxrSK44sMZX7rPwBCSpy46!In0fzVdX=b5?sCP8l4Yz1LDHl4q-Qb?g zmup9oU@X}G%mnCeS)}&z_jpZXFVW@+wOz^-zFM$EVM3i$*~Us*2VbXs{hguXF^GLc z4?Pz#5OGKN(#Tjg=|(q^E&2@J;@f7dzhvJUD6Ark)DG}%xPX#mp#SDeg>6COo!b4%vAw*|X_ zH0HQ#FY1RK#v&fl+-Lhf#RCAv2&yDGM2Zj*d#~S`bbn6P-NQlu{K?HH$cvc>a20bp zk!G|N_`wYAx#;E0j~D!z7VzFS)bl>(?*4EHH+3y_^IOnVGo~N*CbP@ewst(zJC3oIv!82_=46RZu-AjgbP7^H0)0}P_q#yvxJ7aU7M zeM%`MFA3{n(BgYdtIFE%)==+BF7{+NRdoDSS97K%MaaoxBMAvubP^z(*$_g8jCJ43_Llfaf0>NAi~w3 zJBu>zhv)?e7?%6N&gV6q_)CH|=p$nMlvVK=jwdfTwixz5B$Dy`ws^LjD>t1y@+j~& zb!=Txg0T^mhOi7?)JS_x2B`Fx2Ap6`zQ#7tROE`pM8* zE>La(FZW(Lno((oNq1}f%QF6>w_eCR{ZSaL^p|ft{C#}kH%+pvO4Vh^5Z__Sj8t75 z#Bu4HVBxlvU{Qd!$?q`gwTC9oTT;gqzFkmrP*HSHF<$+A8t$LfUmK8c_?xGiV;ZHt z12O0|%*J1OEahy?%LX3D%>ygsoTXT$sfrN*6qC#RZ3{!H4X4YF@DXi(2CRo<7!e)0 z7JJ@>l;rs2RTaGpIinlwOH4oJqV)3u5l_u`BN*oSQ4cVb!t5A2(G{f<#JIJ{5hW_L4QSGV*b)4P%~ThFkkf8E}P+ z+au-QG<5|Tt}R3BoOGZ73!06-i2Q11=bAd33F^KF*XOlkx2p7mHGQNZ@TtGwT>GR! zN%!2+N0voKz)*y0hobBntask`k`BG9>KGO_VW31!Ugi0;{5Km-)W`G$&X)u+YBC0o zy}*?Y^?xCmFK6_sp4M97gD@z453b?}f14OEQ=fQwXZeWM&lebUiXbQ5|5!{H2VoUt zxx;X8`DgA}>JfV44{RYuxyCm@pQBHy2&%tI($pfv|NAh7z$uYtUq1O%Gm(Ceyt7O6 zrOl3DP`mQAHp}nRWaxB@5}rtwpGaMwlDeVe^Gx_7_^isq0Qz&v_{lc0^;UJH=c#zg zbFyzB(Erd89SLMu)r0!6lgPSw3s}f#6hDBxr%o7dM9~wZr&l|X`KOQK3_`CJX&FMA z8LzCGc-4k?OV7+LiZCKlDnhL8th&@>AKx4P2<7Z&lw-9+bL;Yd^MjGak&ksGCckEK z2nrN!YMAcsdAb5B-!Zpq;VmhRh)2(Mibvj)v|o~!0lqlaIiX&_!`%^{aKj#Rvfje3 zJuuSAh(Q;x+u*^gu05CI4$%}|kPb2Id!dJ8aUC396xEz8*EJj!i7xX@TNjc-W|@exAXZG?#>jm?LW>lFY{fFC1W;@-Z6u9 zMZg4b{=5+NbP84fhhUxaTlCt{oh8(m_bV}SH>9s_nF`bXpL`iHQgA<M5k7`F*baYI#|vZL54hqd=_7c+cc%EIC)h}Qg)%8+4A%aP)0hIN z!R`~Y;9K|P!%c11Id?6l9}Z$u7(3nT>6!&iV>brPl1V@neqYDp$bsnRhf-n#r;M;c zkTxrNQj3{&<(bZ<7fBY4o_ZLcl47XQ>c&d@;hcMq5O1-LdA*eX$RUNP^N|8jm&Xqm z_mYXua77*oXaT-YZLQZ6-YcR5t#yL)V_+o6NJJOx;#c>C!pPm_pZM50Im~Sqta$=D zpxL75c}##!s{kcP?>-D$EeXhgtwHbi`Hx=^P6pk0vT*JVMirLl&IR&nHiIye1xI72Y1W_TZ;_}10v)eXjB9E+B&rh5)%Do2H2L_W+##8!6>xCb_ zP>rezUMn=(O0iD#QsL!gB-7h70o3GOm)8{KYlQ{X{$Cc-ga$W zdMQ!6eF$w-kp?Y9b!RW9wRddg-ybOSsnVdKH?<Wv`4BdL?XJE6Z(lbS{IVQY(Buw@W%XVNzKkP@9JB4Mj(XRQ;{L)BD%b_kj4K_ZDe}y9BZC;RKd8v@Buhg3#5BJJWV4w;w!X$RVAAGG8dzk z_jJi;X7wR%ik-Tu{{&(#*j;to1gTkl zwP*W-Auj`@?KxZJ%QDP%#X$R2VAJH1#B{6rvd8I^cOGH-=Ep5Czq+CdEZz@>XgLGy zK!1|URA>D-0`d*<$O!DEJaCK!1;Tg?VwP2Vdgps_OWtX}E+!@w9v8Hi)otf8mrf|M zFi$*v@=;>x^bOQP2JED8qCD~g+*OOzMv`Qk<+}9O3%BZ0xXYh6#AH2c$yDa=b2V;t zQ#Kjq_KgYqW}|x{KmWKB5ZetX(E2V7jNjr?jnAoQHM_U5QmQL7e0y1v`HnC>3&d`{ zkdse=O(6>JG1*r#&opnc0*m>Z6_s>btF^o$SY?YK;c}){@iRf*UhQnH6Wxz%r{e(c zr!`AI!9rlg7AZ5e{5xgPTI{VpXjoFQhp>7$QaHBpuIb{3+6`TSfGl9Wa1!u1dgBM1 z)GC6D?gTK-N=N+3c3c7%1o%;mr#B1P>bwCm|F?bIy^6+eaW&66-GeQl(zLq62x3OL zO$-5|WRJg6H+|?_t$f-GvP>$qK?b$v>vs((kv^rl%wVMpVj3CP#6pWH0k)7gUAtac zxVcap)M${QK<%7Pceg!*nulRWtt#hw;j`%eFTs?}UPMj+G>c_kK6f_P5uY^Yle0(m z#q))g*66eceM$|P_AoL+q@_8bPC2*ur(YCWn%Qd2Vw3abO{J^Qi;21d-1B!qOP?U`as%yp$k#a|+-qjF z`2Ir8y4k&(S!Dm7bPvzhO6lc!;8*`u*po8Lt#AgHGLrVN7fe+_S40l}Lq281?dIE3 zeJF{QNlab$k>aCgRs?IqBza;u{p9n*AoEVy>69U4uQ+hwAr`ghrE;%#KYV^JeAW?Y zm1VxXl#buTRi4Zbb^gB&W9Ci6YEYC#UdV~@J3kmnb_>_vi#OnlyJf7)*XVYu?`FO| zag&BZed8ey*XK9ET~B4+P5t};dMVa9^UmrvITrWM5~AqDialo?^Bvk%#n(CS5})nh zMM511`3eilU|`Xxmy_uW&IF`WZ@s%^jBk?@HZM?$+B<6Zksz@euN)5G|Gd3ekZpHe zrP3`Ek^5xvgQ85dITn{uJ=klq^=eO(F`wNGTAuPI54SpoM>oe0wcM$p9m^w02> z-JJXa{Lw zPr*Yd{pDJS24;BM*N2{rXcbSi&;oZmGLNBS%FDMby0p^>2R>}O)IWvkRHmhBr4Skz zI$n78s8}$s#2(0CZok&V!#UDMcPdzp=W;Gw`x;Y_ShXS^2iAy%+no4T5irg6beTmJ zfst|Ze0>;iU34#qkG2(%7aDIh(G`v&{;;JLK}6eh#ix5cmgRm1_)7QjRI1Ig^GoF^^PJAQRjNqJ5~h25-7?h9u%*KdMY` zZz!YMbmB;U{ORonJXb+K(v8)b!BxG#H61oCISu6dRe z94WxRGCQbGlHpN~*`53DEB&SVHTD(~44e!S136f?;wjQTrJsgB+%00##E85MiWk*< zp~OIhM{&>dl(Sk4?&vKMA5B>@OzK12OYynwt)4|qB12km=w9SuXvQ9J2ZXYOxOS}0 z@T`R&JFs}jVvG0lqkC*Cg3CurWiS~1u{`-0OV`I`tN%%cLZb2w7#L=fUpYty(y=w!+ss*P2v-Ltxp{^FdS)83=iS(@Q zSrS^*;n<^xH#1Vy{dlIB9kB)-cRIx;3afasH6qG&8?XZo{A@~sqRJguk5+{B4 z|K5g>SCQS_USQwj)kKH5+!EWRjN$nHLkAt?KV_qv2PUbgp=G_^{rK(MPbN>U`WS+! z!#Ad_ZZhElc3-0>n5!kQt@+0gk|&jF{nXAoo>9o}vM!96y;(CuCKL+kbA=OU}VL|KZrva z#0rg^fU64OD8oL>_(pnP2Q@;e1``sFlPtuG=M^vC?jDcy>;8fF81E1M_kG~;AYK1mi8VkyV0#t z*q4#8LGci!_8;h_!Y^A12xs82wnRqLx{#d^VnMq7;mWm zxQ=5zGaIj08-UGU3E7KYPxu$7?DV|~PYC3j3^OQc)eW~KPK1mR=q<%Lz+?gRqmy?CZO zMu?8^vWy(}m1&^+aml||6K5@}H7K`1y^Z)Bq3I^(FpbzZ6Go@!`KzY+is~}qr&-yO<>4wE@kF@!U>642tA1sDcrg%qh zfw>ahppP=zpcD_jcPa~vstBbd82t!qf9W4WLFh}DTv%YmE`j)UdN(U#FJ+Z74)1t+yg1+y-qVNX|WAFqt>qP*|uaph=4UWfnT6AI3PC-IP*p9V`E`cBQ9`1}Z}J!1U!_$P}VYZS8&V5%yMpZTtpcaONq| zt<5V9ltgTL3%RAO>(@h{eqJ-9&$~6c#h&tUoOq`YQNBsRe1qotDw+Jp zS3W_+ed`qtni4lBHS!|D?C8r&DSIGB4=r{y{n$(lk8|X`w;WjgIE6#A1QO4KrNkB4 zywBsA<7FH(MjXmVC_dhY2!*#rRXnuY3#9;_Bm(I+?jBqqNYcwyZDRxT5yAz-Hfv}i zITDk{nl~H*pUg~Ip3@|s$g(8;oz_UPygVb3^fzs4`6E3f&H?NKDsv_F z!?(stLk_sIQTotY2j6dR&ZZ*6#*+7BKE!PY_?|TDxIY0qv3HTw!u1k@>P1RD7CV>Q zBiWNg<|L$xuimtbIY!kdp&!H&_yg6dsM;Ey%q0tV$zYn}u4V?>7W2EMuum~6xOrJx zUY_*A1o;AC2MqKYN;I8EBGEF4cfI$%{OQQ!qAd)Gq)hU3{(ulo)Tmzc=PmeG-FnB5 z#eZ%>uW>R^RM_>BHz3ODw$Y1GN5T7%caN)AeG1_@Vx{tXVGGuTfj| z&7zhcw1mu_nLI(k-oV*j;YlV&-WBqVxrN2GSIrH}_%< z^#{kkB*xvQx_=cVwIvX7{Sw`$RLoxVFUfndbh&A6iIf~ep;-qwTf_%fmZlZ38#mq! zR?U)d5-HN0o92meYQ}%}G13}h6!q#w8_}Uc;P{bOaaRK+bW*96Vk;qg5JLiuq9Q_T z`Fpz@aMQB3wtT$0pbLK`JtbL*JngibvTY^ah=){OD9#T)<0#K!P9Q@M{NBfu6A%8Z ziYHLbdys(+xJU-h9_XK;Dop)xN1xf$gF72%7WZmt8hHxkI4KlbLZ;0wAOLfMH8Apl5=C>zBPi_fKK{gAYD)G7 zywUzP@$xkNK3dAyn8<4uPwb_`=(nn4nTn>PtRC^Z#lV9)Pu)Y@#@QG2GaU`D4%KkG z)Z2or_)GJf%kZIIy7_(vJDk6M?=hEy9TgY}GE|%B^z@gc2h@#(sn}rfPvETLxy%s! zT3`A^M zUrUBx0F`-rsol^4+K(Ie77>|Ln(`lA5DDcQO^HsU;|Y+rwSESASZzS2pYd|cm`Uz# zO{qe?6+!v}kz5;|UBz#`M*f-fc*6%KJIpCYg06Q5SOT>QByOG*_n20hK`DQ+>Fanq zo_W!2?835X?R82TsJH#R-)!zoL996+@yZb(y<3!CJMEQIiZ?0592dtSlIMG?d}xJ~ zLS417XXIP^d>)6>X6c?qTorX?;V>+Lyg-wh|{$%297F`jj8iotwe*P18VdrUL@@=zY^p$WnI!!ykBj=H*KI8|sE?2Cp;% z8b`0jrSSf9q}ls*avyu6FKg!`!1y+1&Je)!Oxh>h~Q-6`6zZJep{i>d2SZ9@yV{g!a3g*gm?Fb3W-BC=g zyP)t3t+^KWdP!jGrioTj?hWm+g>+Fsx;~T_toG$d*LBZzB$?M$(yaIU+__i->qj7;9k}pUaV!K#>K4ii}I#RiFA;j?ye{E3sz`%1ZIUW@?*ap`Zh^O<)MBz!{hSj zsTSY~l59 zm=kIu@?&w@O6nEcmtqSJ5j8pOosYTLWCs8VJGms?6IDI6{Q5Z5!Ke@NA&ek+mi&0` zp}&GleU(FZ3|Woa7G)XiXR?AVZw?Lm(&)!_dXpnZe*#gkt*+vv__7bVNQ9lg#S)<_hWn5ZSh{Bg+^4Hn)haD?-W%0Ht^7H=!$Fkm`^xjoc{v8vN|Mi)Xpn5%emK0<5x(B$xb z@Sm&V0+L2)mwgv18zyB9w8j*tYj~GXFQRVTh_2|gV3EXM<)UB!f$ohw=ad0`n%(5L zUU)yY4&S#$;(T7fZ(nl0;s+RAXs+3Gt#{Ekrg`?>0YZC5paxotuLko8ztW4O()v1j zb;mKv8#|IN>wTOL2}_;^@CnDtT|FA@Y3dbY90`}}$uP^TcF>*w=OxhmN8pq49)3NU zjRUMplXIM>8t=b*09frAhoa&d+w6Oe; ztx{-vnz-2UA&=G1*I%?~9FxCFb;FMYWr{55fK$?g@VL%@gvdZVkh-YE`Q4r%B##ohyebB`KD;LeqElo_Y`t zhL5AXChdd=8lWH8ZKs7JHs)GmK9>;7bvEZIt3~+Map*1Y04z03m+YQrBH_gNf|=RE#j-sT^^u9(ZKr-SDj1vdlh zS|GKK>`2PyigDUR;$WN7T(`sS*BD2W9Yb`or&#d5()k zm2yzq92^x7D|o(YB+@$ul37=v0<93A)5|O`!)+zr3oXeE{EL$(4M524%^{fB+~ixC@UGu9J5-pboNifStXC)+`Sp! zSYiLBe5C7)=kFozEEX|=W|HKCkd*u@R~NzHCwabzofdG~pfZ;bWoTQaKh<>+zQcj> z0<&i6gClGU!Ms$5r(~GuPUsK%d%#uD+WX3*Hvd->L}$;VDb$+umrg8#x@M{IyQHQf z<;jDD7v(uW*%0ia)WRi#40>Q@vrkm(l^&&N(Mr7LjP&;f|2+qn z99plO!K(SJ_@)X!XHHi}zc>ElbNIlnYJAetKc~Ocv(_A^CTl3VX}tN$H%L1|tn2}X z#`)RpbQksm)$HM%WZBH`t%F4|LA)6geKo1w85*TpVdgmPg7c9B#1d}nnn=ERqep_@ zygrw@ooRhJ_4|;Y5}6^#2h>gMkudH0;<)iQ|B=(j(6*Z^lO$y-#lxJ-|h# z6<#8WG2hJhwR(|2l3XyvzGQs0Gw;u%o%P+G$AW(UH2AosBvr2|kMA|&vf3qbR)lp~ zJ)nx<^5#v|a1^)V5zo#j#=JB&>Rr&B3JJa+K1W%{o=X=O%1WsE5qPg%G{)}mIe0vQ zOl8YO2q{nhD9~TvIO@zA7eZZpF@hac5diD=kf^uV-KLOY#GfDk8`#UifjLS`M_Fw9+Ck1KpnQA`2a+av3^dzE+|aT8?N^E~8! zlxQDAARtGo@25lJoQcyb+A7v1H%8`pbg@}*wpXh+_jMoQd(ydrIC_9lt@!?Te0=Y& zDWXH{Kx$Fa>+MmUX%rRoE7?^+z<+qbBOjM0a)^mDep5F>sj zLwIv+kpLri!J4K4nxs)lW07~ezM*b9D&URafNo5)a$G?8*f23oB&)bLFyRhfl0IOQ z>>6etUS>eE+urG4KHa7NM$j@RILg&(&Yepuw}WDr9hLGop+etu-vK|S?eLRU@4->O-QrIMYxx_UG-I(yaLgV`^QQ- z>G&a9^*gn?|H_Hknn|Dh9TknBLU_%zGSvs)?3goBI|Olw(l3()7w4=wOABGv8C>Dr zp<0h`H!w@8mZhligsBJ|O&0mfsUA5HXdck{MymQbX*T6r+N z)*3sd>+6nWR^1k1cO8c(X|S+4usJPHeYW7#i;vS$7fe}5X2o|(Y0Q(}_Q1Gi zs<@)ic+y5Sw&W?D&UVeVu-z9?yPF2-3R8db#)?CR-v6tNdCSlQ+Nq2{v-*Yl^iX&v? z$KSpo%#6|9aSInu2&hR&-?X^?FdLsr8(Soc3Q2IDx~mB7Z1jSIJFJ+ z*NmXj!o-B^C$o_Tr-=5cQ!FM82Irzt=Q=K^lpazYLQqRP?r6-#i%@7H{M^&Kd7Ww> zd|-j>t(6i%IiII49WfKL8z`gl#dm3pIc2k8L7qX!)pWVLC8+@vTJdZ^i9G+0Q^i1E zz1Y-)O8BO)!$CYFvw2#4(xe_eJ!Bom z?u%!&b?c~-CV;^MCkn6~z`7_zStDRA>EgCZ{NJhu(DU89l(~#HksTVRh+*MJ4tp$| z=7`V+=FPWzp)Hd|up4oOz5G#B{3Z5q?6YRhD_zoSY*{W7Xiiq^6W=zf4gW5#Eii~| zaV`-H9v4@8DPBo0{>-+GY*%}H=Fxrt<2%`nuOZ$%(keq&KtXl>W`r2=HjC~e>W!(G zV-lnO$cv#+p;_=^sag&;<|B8f@%_z=FFxNykZBx8l}V0u&t(*17TB2QEIpN^J=Yi< zh>>uV9r(ch$I!`{^Pp1m^O6Uz4Mm_*_xH($-j_8H*$SWt9MAHxM;mI^G>MiCIIQazocuN(l5pza0+A@7Os*rT*^UQEYe_F2f zlXgwHqvFyF<#M{dG@3qw6;`V(%@wXM^qc>|n}5!s!ET+!-B*0EY0?RW2)Zw(aUP5= z=IY<>!QL*t`brWP!chJqeY_M!-)aV7@Y)V~>X1X_S*R1@UM0peU}&9@dVP7AN?K{T zXoODga$CF*2Id3z-%3pQ$f6bOX3mVx>U`^oG>7xI(#Sv5Es@nLVcO#WRL?Ga zb*5Xog&pr7P!etif`#8JvtV_F%I5r=jgN!vCD@T0w2A(RG*CZ*2gRx-jKJK?Iq8&& z0#jJvn(QuX4L~btDEnymg;KB!+3q4!tex~7BfU3By_tVJS2&(R;ZSDT=q68^BQ?G5 zkPL*Ii`RG=ZWXh~oc6C{mYqMGU(2CLU_t?6{2=);AUcgAqEMzpt_=7+2ZrTMiN zb6B7@?T5B*(5xIEY2u%CUGMk<>us?Wee-;p11DykW$l7tgWtk&w-N86)aW!I(buTY z@RBxu4yZV@)pK4-uArP&nxgaNA01i&BWNgE-}Yk9!4;(E`$R|Yh^|d4$am`S11_>B zR_LUmShoDvSs&D=Pb|q^;s?^CfhUgab~+y%;yok%Nbf4zFgB64Ad+2CsR=t*>(J~A z&yjnjo2e3+{~8n=J@C1%pLBGgs>iA0zm~t4Zm*d(?L=qbG+$DR{_{}XtBwf(xvIsq zsOi_0%0((|g~(d$4uVg5A z=)wM$W{aa9vpCGKRRpi<6+QPqVdFQ=>mb&Ro)2ATxu%DoZ)#yaS1w2xwWcnF-l5e~ zq=yx=Uh~9SzT0KrO6M&~t#br~dw%JT6~UmgO(W(ByFUswWPQ#NxzergU+X5#NbdWX zdl!y-aTNPPz2dsis2D{%*M&0MCeG{#y$DRkvHS(m+#zK*5ctpjr#9j(UG@0H|0x(06 z4ctmtap}5UPf$9&5ea&*JH=TX-dk1FV{dqsH{RdPqt_<<#dIcht8aScWkciX^C4&9cXr=~gi7AFQ7jt2D6M{Xy471*b*K=w-0@fPY~!YzWT)d~tYnAoKmUD)lnd7D zi1W6r>-0(c2NqSb+Kte9c9lJz@`lB1$2YPXRpJZZXcq*2zJC- z9NJ=Ov!i9u%VY})YOx%(+<@w6gI_KEP|DA(f#zv_I5-M?z7Di`Ye3;D!?Qhw_R_+_bJB9rX^JN}0za``#Ec5=Y_<$~`X%$M56h?k z5`s?==eg+ehD6GUk=lee_r|-$N$s-kbn4CGZg8k%83TXIKClb`*a>iJ^c z&X&D1iy?lUn{EjY>{RL+Hjdub4*bX4i?U%!zvfsU`ugc>R47y~mbYOx3iuO-ryUMj z7wua43OUC6@d~MRA!Zk>Z}4xp8WDgk`@ul4O2l0nzLk2MFv07Ce-F>{!%AIQbBo*J zeFa96!@wLKM|3BA_HEdg^sBz1TomMHaA88HigoWiy92U*Nzyh;(VR74!Vgo>@|~as z+9w{k{0E|KEauZHiV^(pSA9_yQv5)&+j!%fqpDT4mYJ&!yM8}@EQ(LU(lqR7X zu^vJ1LUXBtTQwF=w&?Y48Co&55`-(XIL+^K)D>nnm!Z1t=CfDbRXFUG`&ym-l6!e} zQADnqdq4m&^eV6$ybApk*IAcu*Yl!=bY!p3I5PxqmVa9`_Sx2zmGQ}gDKLyI&u-Cb z!t=V(e!P}p^1#qXJ}@v7k#(0`ZU~mfcTQ&^8gL-BtR%(CJIeX5S{;wnnA0yAc6efW zvVeqglpkR2+n@eadrFit$a5jxksDRYIxM#$2aIR9wxRmUtuD z`$^T>i_WO_Zv#3ZVK)0H#!V7v2QAC*6)q1ka#%ZzZf}(WUm}mUwR2=R2&MJHS0Wr{ z>>ATNpKbcz3jwQAmb)HHd%&$T8aR8!p*0K@fPprXE8aW0yP@5`cXVBXXAOH~^!cT9 z1u~RvFVN0{rYMUnF<1C@7A>$=)+0OzfTmT-hTPZZ3Zei=HED<0#|E&Eza z1xhp=Ee^B(E18Fdmze%=f718+SS)_BwYPo#36})!fJ2s^(}_6gnCfk4kazNPcIO4? zB;x_GUW?Qh(<*5-g;Uj*##4L?jZbX47_W%bNZ&FZMmjFNs>wr01#dtot!OVo0m)v$ zi{9bT%)u^(|Jk)VEE|o4=SaRfT5jS>%`xB2Vs`3OIwc(Lc@03PQ9LRXx=0}}`SAnI zy%UhTU?ZY0wsN}02n)q*k^G9f>92($La-c2KyJ+K zf9oOdID$lEdHj(~Boq}k)u3w20>hJTl8PABm%g=vB-97DtxAEwp;l~ zM)U(PsY;XNyNfrb`&O1+ir`U#+x?WO#TW zT_M(sc#xUt8gOs0B<7r6vYQ_y@y%6Y~xvakes^ zq(i4I(Yd&IJ`Tf3L(?7Wt3Kq=cERb~XnHb8xd(0U{ESpd*~fxOEu&H}cl9z2XZ>ZaXZSDE zqp+F~UtsNCP+Df2KrTJoXq#~*-=JvO)+Ph*iB4+A-_>)SfZdH}z~Xb_Mjd;G8Mh~; z)AC9MZQDZ7w=a$V1#UuYicH_D0mEB>sC#0rr;z&xFQLbSrf_=6OY4LPXMrm1#e@`2 zGfSj@z&lZkbp~yQm$>UW71)$`W+_Y`xGXA==j?A{KW1=s@=-R-ga`?&Mw6>kMZpI| z_O6c*-N_t$TZl?%r+Q5orYH4*8#&}61+L`pknaRVW<^^%D9)XwkBZreMU!q1;q8gr z1hWroep~t>rEnD)`5S!BV-Iz_sC>AO`Z;rc2o2F5@QY9W_~1hy9J)F2_7Jms=nL_+ z5#^MHv)mQF##?12?uHZmVoU<`4?1ne=GBy1(@NS?|*t74Z_UARCzu4L9699@bnYe0a= z5Wc&ElS;qW!rAZ_cD7{HCz0lAc>ITcObqR|o6oL}#k zjMvSi%Fjc^80$iF=ry@l^oIvxvaUxHl}3OV<96yuri2dDLnG>h2~Hh~($3%|&{<1i zy*z$y<8~^6KCnD6ymII}bqRJ}s>V^$ z`~CU2j(`rwRq-IDn;L%$@jAosu``JS5b)P?K9Te#edM07R4R#blm@zHKOzx6O#ixa zb%fUVm1;eDX*T^hV>owEIqg|5)yi_vB-J~O`)B?c<-LuSNEVpe(GDnSAoBT6bv#P54&xhUt zTl@Qk=egvg=YZGYj$*>G-X)VOKOM#*7!M*qe+l@_%zR1uSQrv%jza)0V6Bn9XYLq zV@mjvAmN=_3fi*EsXO0L_z0d;Mu5{CT-kBv%RXT_>E&&yUuVEKcd$Et!^Po@f<>S)(0xB^RwMgN!>nC!-8XAwj(rOG+K;n zuYpg$HrTW*@1IvRGul(H!sQxlN~`Y)^hKnq<|KIxj)O?53Clk3>2A~EdC|9d-Cdx6 zar)g+bjlG>Rk9mU@R)uG#fsp>E``GnVkbx!2q93Wb*fCX&znSQTs-UHRUZ|=)ZByx zLq%AHtk=KWiUoNCQcd&k-BC81CEmYV|G>Ifv8pnOOs*_!+WynwzMdm+!p{${E_K$r zkH%LSR`}Glu4xP;lS4j)vC%DMZ_w$KFU8v#J^Cf*Nya#7ao*4{W~@ULwfi=_22>T* zKbAtJPq&7D&4Q-KzhI-2n+I$7B|93e6bY?vjHv-{4TVB9mLl7OZ{Eg~{>0h}%V51j z2#u$l&(L?}L9+Gj7jrkD$b7kKGwne63(K|M!I+}(YRI+nhKZN01shne6x*4E#F)^9 zf$S>F5$9++(;20eJ;E>CE0#9Z{@)0PYBBUv@2ygy)4+F_6gSE@)CU*$_*|+3=vO~9 z)B%q&4R-gZBmt0cXUp8P2|^e2%D52@28AN_ndHV<5psjo37PtCX<;DFpL8Nzqgm{1 zsQ#)4%2?jlkbNVF$fjsZxk0B5#;O7Mz{8@fuidzy5@USMi9=Y+eOQ_mE=xEq^ zj#nn?i_EXEE6{;&V*i_+;1zGjWh_crIexIJsLje|lvG__LvuTI!qN+Q%!O17L$^~@ zVIjcI{@=wfZf%&KeXu!ejNqZF%N$yOW)SY8nPUxYK+(uG{U2JKir;I&D@WAZk8pA> zfn7&uX}MGKF1FfWjqUG%C)b|=kH7W)U-fB z__#cN{@h+*gF1NlrKD)@1*l7YYr<$Ja(5{e?R@_lY6dM73xlFIu?psv<)A$-V{*(u zjfToSVTI22Xh|jO9+kdj7=od!i4Ww|$tA6(L60bAh>Ibxexse>FdnI*>qK>oEwJlh z97`G{a#*_db`Yz3bd;a~%SKSjk=^nTD}+#OfW3RF`w%9T$)s^60X(|OTeBPCZ0>S>tW881Wzo&>kmObVxHjfShRi%CR2i-PZ*qd4rMcRA8 zO-Q3IgQlWpZo}KMkf@PhpHhSrDx@W}nZ|;{2ca~iu$gGjtyhJ;zekZd-x~R~71}q} z0I9G6%&-XDnJi=n@9akal~m92cg>X%%zmghU9n^fef0MS5G1TwV?e^FaL*47$LfuW zQ?yn)CBhDSlRu^$%PFql6TWt&cAFuQby2dT)vZopaP*)DrX_o)nbsNpS7YFS78T;cckuVWUAnC};vG{893%brB8it;_M>e>H&goSg4Rocl3{~*82YLD^SQqX_A@<1uoQC@pwB0mDeA#LULG3uA# zl6}=cP$LIV;kM7>XG0HmtEm4M6L1x18La0VgqFgkq(PmZJ#v=PmI$NI zNX+X~WqeRDGCLz?m*%4W+V|AkoW8!R)fjbM>%D@4u&k)jHVdNg z_l)I0>1Pj)ZLgvBN&m#^)eMdV@qUQ+OJS7rud}|XN)S|WzGjiJ-mb}teo>C9jEFTMqPsjJL`brP2bo{&8i6Kx-L)1^?a4W+y<1Ata$y9CG?;l_Vxo-P# z9^s=@M6WNApqw{1JYhWS%uxkC3s6f+ak&#x;`ibzyAe?_J<81}M08#wL=(h`D)cKp zRDypX+J-O&kN@NCv)qvUKG_$~I6)4iJoKNHpr=%|=A)E_ur~&lA)n7S+z&CGY5!b^ zuK2Bp&3P^sW;c^k=NavB6QW#@GiojxF8^EYmf{3?ywrjdq%Z&Y1>mmr9I2g~K)<|k z0gi-xib{t)2;2AnK}0@pPYMRbK=5aTR9HO@4Ihxv)c~wgmC|YYryzKs0ou7Tv`@Ms z2jxgb<_5QZc8V4aCSJnJ;!WNtZ_g!%A)EK5wnG=4M0KjuY?P<=c~A2|D|@?PcUFTR zbHO9I)Zjn_!-*VQEM8Z_3bqV_%JxtO=0}{Sj3g+m`pU<4 zRJ@~)1#eK({i#y9F7&Mwh5doH#fv67jILBd&n4y@?kw~jX<=a>wO@gI>=S~Ej49^r z1>fU*ARYhPE&E%uafuk`MOQp-AXK|>Sz*YEt2PypbJAv0Au_7y4G&Z*@Eu8n87Cd> zO}A;VX6|zT`fIXuB3D0WBSmG++sR;b2rnn$Q#gmA0w;R;qq~Iq7<%Xh3Xi3^8 zf_T4*!E;u)>Ly80Z`NyxWRG}L*6Z>dk9eN)59p+3oaXyd5i^`_g;$~qHoYVuuc2|j zr~FV}l612#dH&aE%UF#{SKzMi!pVE8X3m~1zx!?tcrB7_cMrPvX1&F@5n@;6=DC8U zacovw-0zO^?dMh*gy}Xx459C+Vo5C0i7&~A1f@|f17(qb|7ucu+{wSno$Dhugy@}B zPYZ_b30{iU>t{(#H%YUz8Qwv*WCFfk#<6^9&P%$4P!1*vAe2Wo)hL2rKQ$edb+i=1 z&A4+VLB4$*F)jBe+6e2x3@EMi3pdUc?%89?Cvj|aXk*3YhPPpEw?Ak?7Er zXVlkH=gLDH;sP^*?=-)IOWqzy#7t(TQ+9aX;z9^61^M&yT7ay9uDDEs1YyUJa&)fu zy`~<`VClP;YC#&jq3d%1p{SPS`Ex6LS=p-(U9meN#1INd()BxoEJsfk_c%mxc_8!j zi_$3#n{2=Tg`#0Am0YJOnOw%gv(Ls($2YW4&jp-zJ*Yh}&N+@27?|r~iR*5xbK041 zDA$|I9?cTc$%V&0aQobD3^GvLeFW1G1QLyeV8bCaQ= zM=0|wsN>ov_Dkg$67f09l^PH?o!+!MqrQ880X}1UGYUg&YN63s^}wv98imMQHInz2 zy^oh82K?Pm;vEnB6V}k!(A;VJBdlAJ=qx$~7mqC`j)c&ExUj|~6(R}pRPY6zYQbQ_ zICdAQ@!F!%HUVN#(#@#wE_kI76WZ^Bt`^~p>1C$je@`)LZ*A_boi~oVb5)@8gV%}m z!T3$y2oM*3j!AWcA~Y^#oEk+6!ghGZ$FezAT?%D#c|+F)M-ccq&zu+Po(sfOIKrH` z`EelR3WU_ac~CjM;icY7pknV%x)jVcDGS)QN08^k@(kfyL+LoS1S7j9~6MLoH5EJ z_SkjC6pZ{@{-CC$wZZyeE}`-qV_%$@+{pyi<`|+Q2l$Dj&))Bv=*H??-jx|L3JHvjpuOdA*5TXpNLq0FTqfoydjfW=5SCKSw#o42kcZk1>CqqUI$* z(!R@M&oH3+-E&9u9I}FKJxE%&EGAus5UZ!{PInW2xALMNPPeF<&V}=6D6O zC8CL0-iA#J9HK(7$fYBu1I~-V>p5RTz2Q=wNssb?4O!n`NN7^DOO(+B%^uC{vjk=a z7T|)A1mu=~j9R~oxR}Sx_m0~86|oLQZzdwI`Y?^MRp~jBCXr&enyus?bu2bgWDfx9 zs7a43enQ35KR6=)^IioAoPoqyt*FtG5KRSMOJvWm^McjpK@&@+@@Wg_I-Pj(+xiM( zAIx~>hubAaj5(>i_P(pp{-G|K9~%-P#kcC9`z3FlD244Npw-8J=94QlbR=w=MJm^V zXOox`MVztx&sDVkfIuZ?g?JOmv~RvMi?;Rwav|o9+jx)RoNdTcf;_#>C44EChj=~U zugnK!U9`w8iNWG2fQlm`I+hrc<{1Ttf?nori&TqEedCMzUu1eI~lQvKC=><^DG48F|_ z?T8C$Ly1J>Y<=^$bZ8lgAHjb1EDB@)x|Yjne8dT`K58%h0VQO)-jB>YujeL(CBu+T z^|>1sb9Qn^H>C|t+3!vmR^1MZ|J=)7I zD|KKvmegM{sMNvr_DD(f04F8;t1b^MBlNho(i7hWRW&$=F;ET2w@WjS>|#gVE^yN3 zrI5KOO?w|5qEar|`{&EN7Z4kR{qUOe_E2!_c~E%5alfT1Z9T(F%xzX;)N2-{YnCbo zXUf)*5KJpvzz-Z2Z=AV4&>V}fUQj)FQZujO zk=kN2!lpdy31juR9Iz?iHF2`)vclOj^RWZAyoN#bxUcIFT@qHZ zW&(cn%k~w+3|V#X&F}Zh#Lx8Xu}RWhM9$$m$y3dCpMgiU4+LOnY0-F5bn2AfYXWQV zxU|i&mIXca;tt?vu>rk zgrqReb8Ssci|Pt{OsNXZ?1Q-v0thqFB__OYL1|A+XWFX%aVL%6bTfDVxLo=&DJB8O zUuWpPk58!yX(62&zvC@9vgsD@`>-yQo($`#Zm@bQ_dy@{H;khEn@q3`aiN!H-)n@z zwOFr(+MYe@Z!TUEizur@Yh>s>u>OXKPA|mwqQy5|+?SzY+Pbb|4(M!}#x9eaQqLE6 z0Pv*kQ4_^?s+FEfE9_quMx+&-ylNrWDV~MF2cyC~(*Sj(_?#;IMPffJXGq}MQ~WM$HE z6XM`h_RR0~lVbQl&CF_1_oTPAU%uhUq6V(G#}SEjT^`>#pmlx;rJoIuw#%nu8t`~2 z^LDK=ezL5c8o|D`nok@OL!n+vCr(LbqGOmjQA*lXv2n@eoB^F-u}yAS8m%i$%(E^m zk~GV+Z0iDbDL+rjEYxVaLSP=Em=mOw)CpHVarX#EiuRqDt@ZY{QY&34$bP$}_x<%s z2`CYCMjE||$d0Ht+b}diL88wo7xs|?^nXEmgz)=Hta$k?e!N1^uTL<^#J^+m)@lTY zGBXWVb?Ecf{ZQg2)lAB7Am4o7bW?{A%V2JP-#WCCa6^xU2_$;`K#{J#XwvZun|K+M zbcNV&ryZb*2fb(m!NbQ&eR-&;6PO@SV>YI#EX$^z5EP=foi?dE4=>LY+@<@^|BC0W z%m+^}C~Xh4O)7kV_YcxIF>+|H@s&&BeRf?nbssba{Yj5!tfl{yJ)=zU_653m3W8n9 zni=n*cga(fYO@$%WC%rQ59CATboiB& z`725Fpr{CKLgg@n@Fc@w1`hVWU9&gR9SGRFSc%tj4s1Cf&4e8h#!+b1Vca*E2waLZ zsWf(St{wFaDdI@s&7=Nczg;=2c|#bgi1q8K<#cVu+53X@pZ1K(bFGO2)vpzDGX=(K z)cVJZzr3tD-_Y6jjx7z3rZz#keKwb zf&J?9U`^Karc+8}%P+D4--&xeBY)D%UpsZSVUR3G^Ti*j&rI){ysNUXB|ftxlDI!V z_qKgmF@KbIW5TPnI=36I%XRWj$T!Pi)8q7*-jdf^=%YwswW6VAe+kL-mdV3Yo!;@+ zo1;Bx=zjx9tH-gXO9~(Cy5peTl7kJGHf49>-kqb--2q9#!%yuO;?78}agLA3U(=g+1YmpkQG|MN&5!E2FXA&yyiPqk-0{ScJW9?RNi$-r$HG ztGQZz^IJlBZSegC61y7bi@*Uj`Yi%WJyyJ6kw#KBN(fYvR7`M357ClH)cN8#Rgs=YX86Gz-#eOBs94t-#KXn2@H z9C>Mvtv7N_A3dn;LZKfX!rWn_BxG%7SLEk8!y`L%3w7NPy5j6+bO^}~?T#mhJI1kP zOz0yN1jc^$sI0iAP*SWFb1b(^MS+P8)3y#Q#v+y&Ac}Qp z$DYolm4=VvW|a1nNr{R<;Pu14ehdA`M8VGM=1o823%vvAC{A-U>zz8We_I>MARv(n zBn0Zuohlna{`}}BOxqn~uzH9HDi&362tFMioKqrWEn3u$0`va07n7;sa;a~!f)3rZ zq%(oJV+wRpQ(XpRHqLLfVMPQCu*DA|Op|_B{wLiTWdl;tL#+}q8!(ohd1A`Wj#5l? zuXzuppK=wi7>^$8ZJchApu03_1(o)nH$aA(f+~Er%F%T)q^l^u?&I6N09&mSB!Gs#L}en zc(%(-N`pAdsTnOGwb@o9R*0s0vM}TAB?v`pt*nHdelaNSlt9w!xZLB3_U$}AGy1j? zLr-eoMcMA{T1};jHu&RgvOd8-H6myhkmX6OdD5*5;28Wnop{wKW!3>>Af#76UZ5=v zXZoMDU>v`&A|y@Ro?m>x?imGIFit=;H9%= zX+YHIl+X#BViX|J&Eii$xuFUcRwY)+G?%nCDoFAd>_EKTjFj863%~w2;fwAQzkkE^ zvbKee>h%Df=Gi5?F_q!>oeQets+wL9}fI_4HibL4{E$CZXv`Y4<}IV_?i^lORwK z?ua}x`CNB`l4@d~3AbBdT;mhfflxorZ?^^A#wW*AXm$gzW`(ON+=R8vAe<3ud$%cM zAj`{U3Gjp+svf_$a<3mfP#lFaOEU{Sp5gS5brTui^velJECZiF57?%M{kw@HMh`^L zL*B>Sze1XtbHoUydcx$fX(8qDa9siiiF!AFUIDG^7O-1-3*{xLyL^x6w(y*poB0SK zjx=G_@9wm-ch6QODHEgCIygjyUPvn(JrNPMa2?L#rYtacyH_MB&; z2o@|f5miwi410}!l2i9F+Rq)1nW<@^o^Q5$W-W8^8M^j^Gb_#0R`eJ31&>qx>pQxp zLyO=E!Jc5HE+2Cqk~*3Xl?Sx>!E;lrQf-0&#a2&ol<&*Bx9HuEUL(|_SmJ6OPjkS| z2JdrtKAtD`dDOA~fjAll82D~{*O+YyMHU!mrCgbCQYFBJGwUapp}C)n6wAkdJlI$9NZX~+B<6mjIGqu zm;a_DW3d8-fPdRBrT1U7=|NnMN+dMJftwHx^#(}t#1RfMb5pq&y|(pUtmYi8hf$)$oUG3^_a}@VSf62PU8=H z9JH;bQdO5dx~CKbeg-K$vPjj@28dm-L<~Na{5~o^e*KX$MD>ti<( zlo)%)=~ntBZMK=8O3@9I4h}Y|*O%2mqm!k<+b{PsEZ!O-8&0l*i5rcoZTcri1G?8> z=l@D(AK535m<^`2oKNDRcsT4oX4X0?$Bl0>XTQPz(c198tZa+Bjy+kH=_9AWDpZi( zqZjH`S4rH{IK3z1_e^xvBpnU_7&C8;dE zGu9&2HPel5APRNA-i_t028P^>%xaBJ@rkoy@~ErMB4d-?*p6jG?}Ioh&x04Z2?fSM zIJZv>@n*^Vq}K##$!U@v(*eC$;%+wLyX%7MbR*ojqK2!_h)#*V?PsT`jo|Br$O>L< zgHE^lE`|1*h_5qH2*FH^ciHGDAV+goiIK_rg~Tlb6V{l9L8b?m_?iwID*C&skDP7_ zj*wCjduNe|w|nRDx;WP_#8Cz~Mfp$|L3Px3KFU2WG=%K@BAEw=#XdY^;9WNrW_&Rw^TdX3mOG4joVH9$OE_unuE+;VN%?Q0gUp4}nDM=iaBwmerTR-^~@ z4xu}DgMYKBWLE|zjvDacF*}olZG+a91T~Ppcc6b5H#S1W2UhjaXqTY$k29I{GPG9F zAo+lQK2WF5w6t$S1Km>*%UU9+Tk>ny%psQbW`ymuv}up4DKv;)|Lh-Abp*nfsa3i{ zHb~A#WB^5)shx2SH4AGbT0N+oWl4!TjAW0X7a0ESK~_!9-@Vf$ZTxWc;^k7DQTPsu zfqZ|DG^oe{=uSD2H&;9Lm5N-8p-dfktL4-SO}heH51Ga~B%%89{>yH^#1MFfyP5t( zW_dmr70St<`Ui$Kapk({1xCu^oNVX~4;N$YsxVObnE0b7zJw5lzMq)mD(p~UMN zD51=N+Nkafuo@MwDk^#(u<>eKqLx^@UXqT9%>J^^mB}J9;C4r?PDI%891p;ehDT7+ zs-%#FONP@!0lU0$UsyUq%BT=u#v8ApIioo!!Dk!%wAvu{0QQ5lZ!INZKI!ozG-OVC`Q=-7@3p+K;B&2dzW4NJh7k?6#L*u{yZ=)Oc2 zyDs`GrsPm|?Ct9V6uR7JSwmpr3JfS;vK&^sTct$KZt zkN$+|{2$Qgbb&A!Hu7CgZ;>njou4jEWWr9cp8X&wQ(Rkf{YM1ya$EjlsEMTazXp1J zrXm7@(;R=aV|r_u-QNQ)5f|8Pif4q}{2w%`&y3x;-S1h_iKm`@Xzq;v$D^AA+qvtV z>BufS;rMl)#yw%@yCMW#=_5lG{krAT69C?0k!)uf{}dc|UIlBz&FX@C6=a-Ef`TdJ zFCHZ0YQ_e+IRH-P2T)sNA9-rcQImjhd%c2Pwx?}E!9yJV2T+B78uTdPz>gvAQ2D-~ zbmO-YK3Ee%gxHob=F9Jowqki{U-D@qkY$tHklwX^Jd~oXO9e_4s~fV!nx$uB>Wj1{ z*#FYXwfw`f1(Q5tjBclnv4i$vU)@j83F5B1o(}*zp*RnFU0dR(&uA<~t-@hrt%M<}V47}iZGBmq@6kXF8vRn+GaK8vU1R`ByC zY|1C12jSk8GSI7$DJ#oL0Xt&h!@sfTeDT<)>9i(9P zy1+XPdVyEXL=t8a>s%zKZ;tKGC6Fsh-JBRS({8Km1eM9}NWX9xY2HReNeu{GxtWrU zqp>rC$B57pXTK|{XoJV^O8P?OwL`|b=?1*BYERV6tvY$jUa);b8P99a8vZ#BcSRY( zy2hy1)3o|;(9!yr6?JDoA_|-5UyJb1{QldX@Jr5i+YL7Dg?e1Xm&!4s*Q_I<(`IrK zs(~BOVmg=t7wZIXUK52r7V>HXbA5u82NLpdvkt$B02XN$E0(VPdaD`D;KA1yh#`-*zZ z+iJuTY&uNqCDCw-Gd*HEALwt8xiv1XU# z@TCIXr(1==BhlH@g;3IFl+C{1mMHGe1)MX}SU8I7-zd#C7o%;mM|wHbR+BZp9UZl& zuU~9NzvQ(zXpY6H((Q`QIRex()YF(H>ktpiOhF6rSyACs)qbX)B5Z%F=o;gHsnmOR zf!WQY+fw$0V!gT?|N0%wp37Hu>Yh+=-STjQmp%5{HtGqdub{QgA34^x%T6Q#P&XTU z<}`700jZb4;1brbc?tl*WD%=q?%uz#E{BtvBhe8gy!~jAg*X3}-6wMb(nCWcndLIi zY`&1*NB~;AlS!N`B1BdBR()y>AQ^ZKRFe8Ac52~XC;cR?ZfA0l$~Ap+P<6xRAjUki zygStuY43jMe*$VF03lIaiWMBmsU*5+XM1KPj5t{uV)+qNu-h5orDM(#_ubG4Q2m@d zJ+|K!v`v3Pot5Q`%W< zc+0#W$a=l61BImjLB-j4-^?ycH@fxOUb279rv51F{Q(S+%DlGCD%7#uT70n%^nc;j zOKJs1z1sRdN*o-|A%R;w0{^}=oWp6E6>Wbf)|*O}Yu3Qrh?vO67?{uu3Z1UwDUx&2 zOZ~1TXh;yPT}gttFjESI)>poL8k{KlHYyUvhZoO^5kfeu35QL>-z%>|9B`rqe38bh zI*;d7v%wGJQdyxZpToDTd>_3WYOrl*IlbOBBg13#HE%~P@3wV=Wo9CdJ_kipZPsvUk5;hvjjb|m9O_*)I2ly_kboLq^K@6fKh%? z^@nl`3g9AH#k!~NiHPmzyYqHez@%53os9RJFf?4cWe=3GAC)4eSP&^$o{kzyclYEi zpU$7GC5^(p&wDrdr_`{@ejYlVejCWD_;g7(bre}WvN_&A+Co>HqUV9^^>`%+hs;c| zdL#{(-}3!O1sdrV;S&zAJySVeO)s*xnJpw5i5gjO#Tv@RlRA8H)sRvXwtm_0t&CN&lWXor5$#ah! z=3sOF{rNp3P7ozdN%MD;^&rRk*I=0S7sO$|T zWVs371#qu9RLMp+c?yFmo_0^uq9r!_M+xr64?cpCT2JYKnRpA{6e_c-#KVNBW#oQj z3`edEixVZN4ONZF;ydRy0}2nOGX`BG?muazocOT;kgEJGs!k>C+bY@_uB!}jfLX)ahiDwmdm?&OJr`$>y;X~syV7r zV*H(+3LR_dbsvlDl15P_M4(yWF3DaAV&!}8P@js=5*wmZV`==<>lQHe%!*PElTrv2 zpj{Dl)eL-x0hyV z_zy1^2Zb$S6=cOPEoJiDhvTBiJ1f*+?CJhuga{SnvW4o~T(f$PEGwQKC|tpfFm8F) zGudI30%Hh{1ZK|yqHhIf;Yw+=+1m}(=#Z?R?O~>Tb zZKa?4w1BJ!AvyOm!73C%d}pBi0@cP+{B)nZt_npU^|p5hsT=d+(*phhWkhp+_m%Dm zS!=@kSsFt=Ssr~GA)h6uZwHKGeYO1j(7JGb!wGxF@L-q6t;+J*uwR>AzfkId*%a`( zru;A*Dp?IvsIeeP>5$I3Db>dsrMbfH`7Ki%?1jtF50Bv)(q@ z-8iD-nPJS`4X1CbekCu@h`Pdmj^}Jt{hcLrcqt9Xj|O_qmz1|8DU_Lz%X;{VSFBJ} zTXjekX@9HosjT;){~qccnfTEyjy2b8WmfiTU7>^7Byg-CXr^wOd zpZnJJTA&b=TX9*Qim`d6n;_1x!F0kn9}$gGOj4cN5Z-&JO8bh6WBJikeJ|U!_`&x; zpnGj~iFr2q(xAPfdusM5UpM2IPaVAl^Ob8jAjt-vFwD;+x!U(=DHdBaBOehS4rFER}B@%g2{Hv8NM2;1pv30!>0wn1%|Tm8Hs1$*z|E9a*hW&FwguBrZ{L$dpPoDM zYYsTDX%^js%D<~bA<>^QT|XZhxW)}mkXD>I^o43`j;HS!zfhK9{tv^g3uQ;@fc@|X zNZp>ld0Llu(Ku<}o)*M2r&|qq@8zw4WxlP^`CdK8Bq?8|k@{zw!Je@$gOjD+bPRv{^8lEsj#im)?z8ObN#@U$udfi!|4?{ zw6EevRs1=r+(N{(Ue>K2rQ}WTF{}0El5>&LOox+BunNz5^$~QKn4N0@J2-S5E(QEF zxe!z^intHLcq3*&cDI`r=x;s^vici$KfCQ$cK5RQ#VlF6qs#KXjAJ!_bsA!u!Fkp8 zzpYbdV0Z1PLX!-CFsoTr5EL(ygpRZLQ6oKb74AW`4QU@BtjTG?5oNVhbF497TEGl#+@&6MnO;Tx@3S&o?*0U^ykl9I;2JuxYyz;P79&exf=l!q>? zqidEckUtkigf@5!GoDu}Wk`T{{^{5p>7xi9co4j;6-Z19Uts-eY6*p&2CC{;oKZ(R zR?2yyyiUqpI)D$-{rctBib2h&7fr2mm#uFm0>564Tb37#W=<_??380Ie zY2wDCNhR#~;tthto-DjUnflxz6TC;ENmHrdq`*wUHm1&-${VW7dQGp~;51TqB2>2Q z9Kn5@r7*zB`U36sDCER) zQ#g(GUvk5u=%x7BLNL!W7H!px=mfIO6PpNxD|w*U=IW~eN+|n1HaYPp^1VQe#-DYQ zQie|coa3L;oI=FQCF3!k3tgAX9;t?=iXLE?u0I}7%{mAP&Pks;85d7QWP*0hgQRze z>r1i3Q?SaIx*PxPPD4_0m~G!?k4tDT-uo@}E2I|Cc+7 zyJLI2cMjIh?&gbq!ams8UALjT6QBby#7V>+h;o@MVx&#|+wWUUEAFShro1PDO-bX+ zHk9?L`mw4iZwk`i5F-YzMgNbjuMCT-i`u3`5Ew>EkQ_h}ML@blh8R-1LqeoWIu2kk zBPHE~0*Z8()Sx2`BZ4#x4kg`P-}pSw`@P?f@5h|$nmYTOefGZhTI;^o+CllmEHWcF zqxJLa%UOU$+~_r>{+EMAEpJ=xGOlMBUp?Guad5DLs0UFyk$RU(zh&a?Vu`)CDoOi& zx}A(ONnkD0$DI(bj3UP!$%G+Ri86VK>KN%?&cvT?=@I}PVM|h^>1`2J_YPH;gt*+_ z*KH=H*+c{P;hVli%4thb->hJjLo9Uw8D(C9ts>yM5i#!4GXFwUj-FCF>9ilCN5?Y` z=>KOe`byCs3*=!r@c~qIp-+tX-3ixYMdS92*N<9SNbQx_5Z;d;hag5_=CN(4HdH=( zta+VWHq{Fcpj*zyPqU0~^kA#%s_V%=spDNNO(GgT_jo7qIeAy%748)Rnn%)xfQH2O ziAL<Q8xAPWm!Zl_ZfSP_dEUalEFAP3#NRpzm+5< zH@bcQGzq85cPH;N%t7rRnA{j9kl{-DCXgkLeAB8bCnu6+sxWV{M|?!A!E(|0ez~5T z<*l=5S=~z$Bj^@E2|lS#A1gHjB&~6uW|OT?&mGSQB4Uy@NsW3XQ}L80KN?V_fnu9m z4hD^(E+#@eZixcr;5uS(M6UAwRJ!Ch$>^D($-FwGCwV=gw~VLJ%Ul`JvDxVJSHzzs3*Lrco&&@I_W^Ig5>6W;Gdl0= zV(;joSw#!)`r*@-$Q-uHzadbvXZVn524|dz7yscim_Ol+)hntBydAWOM-sCL49{sJ zy!nq737{SVl2t&8O&S+geLZ@%`{f_=^0y0~2Co0PHy-I-0d9@Hr&+Ip53s7+AUuSW zzt~@-2Uq&y?H`80?~Ko9J>FhfF`9_}0Ep;$Ui^irXCjp2DaXEJdzAxgg$w%0t^ooA z_reF0jCCKcpDf&Um0{lH1ZHS2{rfYu@Mlk|eiMi^$A2|M-WAh!<6q^#ljTs1!)h3FjPy#HO?ayIRJV2JF+fZMNxob0`pfhB-_ z{TwU*-13e=LyCkbr_bJ6#Ou}Levcng<8UVfo>II_?)4}cen8-JM{`8qb*Rf}#vcw_ zle-fiat6y!8vJPsd+5m&#NNub;!NEw=_kq(jm#;NE#p!H9~tax8q!`2%^vEb=s_3p zgjOX!aX}f3lI$BhmM<^kV$*%7l>tn_74K%oKIGU_z)upId@mwTO4_UHdKnf)-z?s^G6m5A8#}tqg5{{bUH10x&UTBI;JA z6n7BJtn~LMiDU+=MVJxRum6TjJ6Sg_;6l$eBu!b!q>Vy3h3|ddM}h9gHC7Pk60F zCdreGG*LaBcDKFqmv=k?eV*zS=vP6mdNmMSpRzP|MzO%0+;sFgKz=N#%*i{aweB00 zGbHyhE0_7g<8kB~9{S>jT~$8hVVQX%@g+^dcBGe?qv&B{#K@Apnm}P9-nC_iYzg`@ zr;2OmNv>q_Nl!!;aVVV#+O_i#mgp1O{J8$#Hz%az zl)0Mh;UwR{mSoWhzW}oR)l&M9)*~Wm1=p|Be;b~cjP<xzpE9B=bnIM&1jB=Oh z`g(6Oi9k)!p8z{tr8D~{`}jvw!RQP#DeRKP3%A;F>SNkNsqx#xA6!OVxPPs^GVXy%_n>Axuiz_=YZuP$gG;l}Xuqbsl6dz}#o{=Q5C> z-sU+YqtfyM73!#aIb+4dH7Re43p6IXWKO@Bsp+=g4BVvjtGx+4#@RDYapwz{q}J-b zc^_IL@uN;O+Vl_2pX_tu2R?^Cf*iQ)AKb`8PU_JIe>kGc%esld~JtVSAOEA6lP{%=}+rm&?gqH zHD;H7fM6ZrZ0%myhOaJykjSjY@1$k@wR}-s5wy1Bfb+!dMdF31T>MF$@GubM)9HhW z0ZSD^;Nj_u_bx70V%E3*gmwpc1Yf6gXt+skxBN^e`Y^Ztj$|s{)W1@Oz^A$=e>->o zS%2}?+?c3r{;L0v4^zEW#@{DdE7Ptdh8qQDamoAS;>v-V90V14K~;9)Gw<3~i&5Tx zES-JrFB^%Ab!A9fvF)!O`bT}p%-OQpk}^U7JYd#nd6eRF%+3oNH|I4f|MzC``9FN7 zCeRyHvu`g-bNMfZuxHQacnk^ynS0MnNm?bYudm90i^VKx<>D_{FYX`mxK~_;y|@G( z_PU^148li4kdL;amWdTq6_!>0uwic=3yaWy0UzJEps7a>sAqduGWE7be;^*emea@a z!RO>jf!7xjGujPt3zkUlz%kWN4osXpza)mlfJNCz|HNG|ZdYpgu~-8>?r+=e>gH_k z7kX#=npfa{j@rv6Q{V3nc5Gj+M!vBC%KxhB25@xy;dDOJDt6k{fz=Ft*>@jG9%^?s z>e$22fX?yU4{0ZP%+LB2*wb4(sv3kl;{quLzgBggCw3+Le@Dm zz%2T@V*tWK)(3Y~*B0xn(KOwi!>CZ3f|twoevik@k9`wFL3xnF;LarStfv6hPlv8+ zE0*~_j7}-xZ#BIV)#MVul6Dv6{IeC=RU3(UgrSwy!qERrl!{zV6V%E z0&ijRY)hrCU1t1Sf643ZdxgUqbC0h1&)W|wj!sN#ksY<>va`5e0ZOA zm*v52@0N~NV~jwq-TDBoYkb*2_x1i=0z00iEvQRgR{}7Q{GMMaL8`-13@_JM&(B(P zj`)ZP!paPvuviP$5g z3WWw9=tzuO6fWWAIG*S4qA`-j5q*C4X|3ok=qN*z56O-FSH8+CrLrumbt7$*3ihd-KP4DpwzE^*XVy zNj0t3O&e*g31u;vJ!GQ0Wa$(W!por4QPksg4EpRqZvL9Jl?zPHl+3chcC@a#RMcdw zbJI>xB%<3T_7jI$RP+GriI(=BUriu7J8Svj7^gj(H(hVwIlfTkY#g5cA_ zoq6}JzV6vf?HRsy!Sd`gd?+sRbTFzxSI%+teC*E3OJ`qqhlh<8aG~2$cic

>dWIP zVdmmoU?deSJe54{-k*{N3LffL4vG8exocj6&n5TD)_V|s%KmJN3u{{(R8;Jx1H>rm z=?a-osr2W3-+!OcDMw|;L_Vv(oIXnSoZov~@a=b9FW2W`5}TCx;wIug^hb{~{J-i6 z%5WV`1$oUQ@k(oai2>zp99|&|?k9y8O&;BFW=r%F|6@#$r{TegAym&x~`%5Y5 z7kFXRL3WOTOwN2tRe(4?$2{IR#w*I&UwFPOSZ?v8JdFJ21ro;A zjEAA&kYbx@;vm23+1xMjQBM@T@?Q|Fap|Y2B;IhjmHQ&LN$w;X)AlzhS zA;am_$&ldWE;tOj3{)88n!T|`|MJNt+S;j$hWCxTQq`HWe)WT(c#b5lL!s`} zvmdOXLslcc3ofbq>;lfS3s-gi*x%lU4&OSrZm2)z{Gdu_K=-6L{JjYQDV7ODC2*VZ ziDDJk+dPvh?zge`--nYX0M3M@$_7E;clXu)-1@1+xwsGl$ojOg?ILT&kP~i83zwE- zUmgr**Nq}4YWgN!4!+oco`#9Jz*;=jezDV_~QVXx$DX2gIak2&jr;|P z&-U^4M6g^-@h?Px3NN-w1y7y0`pts9@H7v;E1yV(A*;$mVdI$0MS z>Jt3;B&}EHxvB~~rCVJnm*-m<=;ye}W1G22wv8KF)1tr=M$i!(Il)@(5Vm8Kd+EbK z&(M~gjH{AHLn&JBW#0hWY$4_DdY?1eLv~BfibXM8b2=#(5&Q-Jf_#+CrH~xkZ2d)z zlRPn#&V4DC%6xPzEnL`^a~KWT%%GUYs+dcFeNIzXDq9!fRr!8M!fEJ94{R5*dGb)$ zTDW8#fc8^Py=+vypzz{&Ap$yEQrwMaa7fA3v-%0?X2KR^b$-<0oxu`kty;k9xv-%d ziHlFUe>^W#{La;>?a#R>#_n2k;b#}N0d2#gMWO5Q+F@trAWZk}{#7Z>scq@6KAuGd0ahn19hL2hFN7FixmV15o7c^qbo=Lq9P)R;u z2Orssdg(aw{h0Q0`fQ6s9*(3mlG{*aYPcS@(l-#RXye$sCB%QyhEx^NsP|&l%>cdN zCE_hU`q!n&^A|F7NvnrM<)169c?MwUm!C`VDkpx%cuND@=A<5`_qFR(-=#G<8Nr_- zjYAs-f(jP`Fj4TVSITaqISvnZdNme4{*RVm?n>qcRX6YA4uzM6x?JO>fERJF!J^zX z3~|^a$fna#L&<7Lyvb*UY(N-_YI8%7+B!_c@7w$+Lo20!{8jaWeI$@Q;aD|s*No|y z3&iHjk(t9RnS4);zv4vZfQ0K0X((c>kGddlo7|>3W{jUMx+wUm*G=ey!l?)?k=Z>m z?f|{v=n*k*OC1uQZgtl)Mclcs|2;W9iRN&Mr3cx!;E(wIjmD!cz(B+~5qQVz`e_x^vh_g*EYYBxT#bA-*GN^Bevq4JWyvzx3gYCgknnDs13i9G5B03O zhd{ksOKe-t)v{2)(ws-9gFSgbF%2*(IO^rqy$|Iu#2AObhgjksZKP!DD{&-mC+p>- zb!#Dh#xwWpEd_?V00~_V*|cImaZ?P%gDJGfayyC~jxwBT==DR(VT)z`Bl;fV0+%nK z*b4qr1-k5N%~15h_8y#6u!4_B%Z<`i9b%=$Ym}qXuhf`g^2A4~it}Ut2^qoIyHE?w zSjsp9rg!qzvSzBP>Xo7s%Sm0=Mz0jBmrF(tEatel1JDux{C!LXe+dhelQ; zm*SBZIq37EIxfVhv&uLTQe@uGRZfe6M7G!2IoNC&MW|f{(|TPOOG!7b7fe`xChMhD zFaL{-Wzg<^0_yNu_3}I|^Gwi{Il?QJ6>(M_j6i?F(2vF41x=kkwo$T`<<+^+kcLw< zj=gF@*c1c1)S&(g$=+5W?CTfROT!wj71V{`qX(w5IuHu&lP8@nX?lZ_6F)bf+pgc) zvdDp<_{|;Ig~Nhw36F(2ueImn_ep}{(yIxDt;Tx1l%3YVYqf1q$my3v=J@t#=4Ip{ zu4w}d7w$YjY~i&kz6%;g?cv8-em5)6WNqEhX@GZ&|Cmuz{eizRUHefHddE-McHp}P z;<7!7!a2FCW%R61&8i~gG~6Gw$9?Fs=g|f35X8=&9#P{mHwYQGX#42r1LxrvHlc4x zf2@zN<>eYDU22tSk#t?%Pe}j6fetwNh8DlCYT@rr&=N_J*&=t6o163Ka560c?_IJT z0*LG_>(>`i@nbge%;QTwQ0^5Ox~6c%r%oWocBdvUAAVwiuV>iC+DMxY zV;kc<%!s&nW@o#(D!xFK9z=T*htVq(CypbDDBU%mVYKVd8KoAGGMpVM-5fj$VR_!7s=dIHF8lpR@8 z9h;TNYQ_j#srEk&31^nbQ95ty0@=m6fVy%I5P@bwh35+<-gn2^AEM}0q}7mV>+xyFNtmYL{8(fb`Cic$nJ&4+PMWRQTpHk$k^Qj z*Yw+Lz|T?b0&P(_k8s%~jXmURKS8&I&vzq(4eB+_H{?z2vV5CfH%bJ*M@tJqHr>M1 ztLh2@HF-9D20!hYyw(Hxgg1a-a&79N^Ve79BtSl=VYDrcpTy4qxD*ezqNo+WOYlnI zDu#Zq0|L95Fsm#}-JJ#Uq-D zNj}5sOdm4augWyF+FU4^BA9=uyG^Cs8U>8wzUWixOjcbN{*!!kbTk&Z5qet}D9kl) zIHsPM^0)A@?7IhGcDV08@M13kQFbVA@X=lpzN3bMY^%C{+jt3{>P{HJMjg8DQ|LT=R$LX4<)Cq>;WuH*i zpV9(kJyUc%7cCtvLvv44L44@h3zH*xL{P>3pF7!lM{7kufwIP_QC{1GLGTQT@QPFk zt?G@^t9|H&wU7zWJj%@{Wq;r#pYW>KI3#y}Yjf`HnWstKh5lXlc4rR$;Ipt2JYsjp{EUDEis$V)Lcc!S+Jd6b%yZ|*oCTH`%*3;_LhI&UGxMzKCSjCP^LLW7i z9$s0!zpMcRzK*1^9+SP$%7W5M9axKeiZcLCgv3I?aOJHm5EZt8k59NXY_h0Ai$N`PYlEAeZ%|Xvc**OLM0Z;fi4`W zFI4h2CSG7oI(pGWBc)*&{!ScTDE8TI>-SY{;krf;$J8gBd<;=oaz2M|YK33rEg_AO z6IXKI`n66d$DwmgTN(YqFVkKVnTK!oL+SU`J0(W#J6k$%z4RI(~xup8yg8!B0Zv_H=;(;8mT@3~NW zDw4a+z)J5ayfKMCB({}XS**R1^yIcYzFMZU zGb5JI4!C+v+Q4skk%7qtQ7I{E9|s1S2(ImhmCr%X>|)mEYr%C9%HO@zI4#KA`kM;&Y%)X61x z6yV2v=-AOGlxCXK!$<+}5kZTv?$x8Fa0()0G&@REYVu1uuA@QqF%H>*csO~-Q0Q6e z!(D3M7O5!ja@gC+tbH$F>y9)n^ovg3rj_Qj&!fDyFq>u(Y5AD6ByDQRpY%ryEBXq; zigkYu4ET-6w4L`ntdxU-kp*866j|V&$eKxoy1e=|yrEkw3#&SHy6@D(8h#;j+|2ZD zmJfVXyHyXJp(?r`F)0LgJ0jk?`vef;-;6NXes>yD+~WQY0@DD}FiD|qJKvYS@WJpt zo1W?+Cy^`zi%UhZAomi(rUmY>Eis+^=k+N+HN=!yvb)ywo;A1Uqams6Zy9Pq>?;*_ zotr;n6E4r?Gd|Bpu>dkj2}!lJtn^3J$B-)df)%Wh0^cUfkL!qFM%_*Bld6KGL_=Lh zy0yh#dmM7mRRms=>4CIM6YDX%u1A^t9l14dAT9O%<&8(Swf>Tfjy(J@kxqBl8i7b_kr} zNt0-fg9=j9{zF?tz;T*4#50t9GbNhwW3oQ{ zyLDhoBn(z_T1PU*g?`7{n#|Y zam}VcxraXvJT2G~lOVZ-PHT)gNZ`z_h}|w{4AH`zea^KCqsby~Owj)Kz9)f7j^rM< zk*Hva4oez^UY0?#IfNi#W2-b+9iT=24RgDo=sR-M6ddMjy2*3JJfLO(8S=C4XivjW zklYP$#aKcC9sps91tK}5uG)esTd7jMd*Mllu-1fKqS?pO&FzT+&!&@$(Ag>9Sl5-n z<&gSd9@}z9!?P*&g4~H5E8QvO#TDbxnr{vfM-DBQ=3nlbqSU;oD{uV*D#~D{P!dPu z{sZ$LHfJ>v{`&4jnXl2A&09~fC#QGjfbIsGN57hHj7Lf?J!}GK>nSADC^Wy^suF)F z)+LNj-HbzLmw#a+ZFwD(nCzR+19!Ccmza)MMho-Dn;QB-qORy*!R4-v96u9NNsYMZ z*Ij>}ZaJeGBM&aKC~k%VIvqg}7=1riqHg=kO}1|=%`(a`*;QAB$Ac>`B^&6uI`%&D)q>!dj0`0~cB10U;h9#u*;ykHwdN+I)XTo5=Fj zUeDRO>w_rx0o*P$Z<5oF`Cw(`(>yaLj>~Wvnw!?h-?D3Ag(da^hk`p6!Th!%=LFoF zy~7N(V!4f%XMCXNm)}uL@Bmkyjq@-C(DK|Oln? zNQkIS4efgm2jjlqbBb~zUF%&lvW;6LgeE3v-{V9cZ9zjMfIG8dMIy^!jF1HX(zB1Y z#&z>F+$>Wi@CCq%qi~|Bb9Gt@ZE_R^Z+xyiKoIBg>ttuNJ>}4;y34qtAbU$Su>iU4 zgo|)1WFa|&XQ6UI5e8Ug&Dq&ZW@^)lgLZ!cKhi_8Hm&I&ST*Xz8$CATPW7!vkkxx@ zwsYnPKm24zM2St&Mesz`$ZxaQqBuQfn0QzFU5>;j0k+%22tougf|A|^OsGX3LvE$E zE`Rb-kQ{>b^w0OfxknIjS3PE`#LoeZqw>UAONotS8@&|&V1b};uZXD_( z*flUlLFw@D;4vqr;@vDQ(pt~_E*)XR)C?^LFJmz9c6@A%n2)B|-;^7M@q-d<=fewZ z14V%fj?s>K*Yx~Zk+|S*O?QV5v+lZ{@gwTvg+9clz)1x**rF94v&~1<2v({639$WX zHT$KnR|N>m%fY0KX#Z-11-E0Xbe~Q|@lVP(J({sYa55BO4h4Z; zR)z6Q3h@NzJs#32DH+|XG^$bSsJ0!*W)|==sPh__f@E`zzX-&Ra zmM1T7o~8{!1iUe>bAX@JZ}iIv=4Vrt8h$%e>~D2jZDzaOKoCXm^J2{#mZzgbj|C)d zsVB4Z31rt7HR<$$J4`+;{V~s2l()S&^fnGR(2ezOIX#JA{YIa;cwcjH*J(p~vNfosSwgEC&F_&VG-S$Wih@s19C zgnIGh7(bZsVe#}E)gpJAVIxF+voK;d05{nf;~Tw^d7mV$Ym%1v?qp7+eWJ%etcFO|J3$ihp0k<3)>pugo8<%K>53%hk&MHpqY2q#bvn z{PLJqT#x=pHupR!p8ewqIZ0xWzJM2I&YL0Ohxfr-;I#U#r#|b&M5|5k8Dw<}AJQja z;$K}SXa8`Fo`^++pZezRHH&#+oKfEpZP$~X(3i%~5)*`>k&7P~qx}*a9hG-uHTDUu zA)Qd^D$=SSc{3M(Mrfwi82L7PJg)xQ$L5ZtX#J#l6cBi?M%Zoxrb7)X+k(k#4Q5 zV!!S*axj;YNjQT-#bR+XN*R5=Vf?FBi+ELgS~Q1ZG?*GhG8g#E19!Fa_~)Mem(csi zLTbP$_U4iYrtdh*TwufZ;)^ieq=ErZqTFVm{i@+zIi^A+ymDwTmX&^7$;z zyt#5K6B%45*rL7SX`sz6+{l#>KBCunc}a1YE( zkFWE7eX)t}NMa-2CCV99-?N_==+;h}8@X6%mg9$>q`yebb5z&Q9LVWY2>@}fC0TKX z>3~LW^6fX*8m7>4CmD@}XBmyEdb${X`eVFb_ff4W!=%t~nHe7h%$XwQbyqcN%E7HY zo*Kg2&|TqJIxm~i(jcWYPoq2!4E_AanboNR1#I5hyDz^!p(STrSEr1bKwjnYDkSIS zKT}b@3~pVTbB~In<%V4lKropmq*GumpiP#eF`T(?^P%TvG$J^W-{sF1(OlsO5R(dl zePh*vJRheR&uk={%1EE86Z0y2P+m2A5VshfM(1NZ`xyN$fPBZ)9dS|38=^VQNx2>Sx24|zCB0~|Mtv7?zOe#Pcn&> z@Bq*KU>uob1p}hvecmRSm5wIahKe%T7k1wIjSl+L9p*}j1pA^vLUl4zFJ0(xrYm;o*{A2HXxMbZW% zd2E$_oW%gABsrs-=hq?$(A$!XXv`g`xxIXVCPL~X!u2Km$Cm3^1$6LVD5Wd*1HYt*0`3V**cfx)F)O6J#z-&*#z1m96gW9@uaBNEl9Es_hbN8_qqk9hhT9NIenaS>j_5&N?o?5AcNL z8|z-3C+{I#iY(!Q;T|9Md|x&Y7lOtJb&`KJ|JLFU=Ga;~VAhY=qKo+EZ@(k8&P%$Q zt(x6%snSo`{=tMPcV3tqwW44OuN&@Hy2^v!KZ6R2*xB8$t{Sjv|;!d9rLU zCuz%;SF}t!w@4*tkbaCS>%50ZFdwiF}FH{Ap;opybw>|?27gofif6)`l`gQ6t6s#62 ztG_b4(9P2!0^QvdRQL!Wn~D!~L;d75K%gdZ@RjQ0DHf#^IG z7!ls;+w>-&`yoAf)+U42I`;3&l@t;l%w;7vsQDI1rM|!+Ptf+tu;M;u+usW%=D<~0stHL5tJh4Jgm9A= zjc}kpk?@@tq_|5T9`5muXQ(%~Y5|b&2O}k!q{c&yd!4s*1n17Ag;{o`ZS}VF(PH)r zJ2PckAv!|*e8;J+C>zKLev&PBXK!I6)1mT7CQu--WLin~k(oE>_$JzaN6&U=ISwVQ zER*xQ5S@1l$o9Nx;4bIMCAox=NCmw!39qECyjDpF3Ybetr%>KFAxKCU(4k9m2&Zl6 zy@mEz72!sq?9Q=KhkbNM3aoTpO}F5owQ5ApVvc~Vf~mqzTP*xLFTfKSW})Us-yDnh zexHL2*v-Iz$5h|$O~9Zwc`b1rUPQ3OWQ;wx%_G1lyr2jguJRWK5>*mO(KQo5{bj45 zFs+@BjU~=>*9_`e?|_&a&qoBZVdy#-->}TlHK>N;gGW zRSQ;=>*;E)^Wmn@_n*lTQil!nJ*?#)PNa_bMRybH3M|G2TGxAxoW^FU6@XxQ5VI=) zISDR#QKO6&o!5XnHs2*Qr|%l_Ma(Z}Lb*SAl^xMSzGrI3NNIB|_02$iBe=GR4@*C9 zw#ZxS%jkgjW(Qp_f-P~`^}@mx^MUB;A_v6$1vgIWN)soOp$&=sk|Q~&V5Q$)qe&h% zA|OOk&mTGK{;1!@gw&+!U^dM5DhP-fZ*X_mv8XORp>~j|GBiLcqo@1V;9_7qcvB8O z9Jx`4fEwNfcV#ocFG9$wRJ>?F3;uJN8%DO;21z{(lW9h4efHbKAy}w>qeV|lwSyXwYKwC-tk(;cUI1KV7XGBG56au zIv?3BuEUR6MUDfT-%z?`B5x$?X_N6Cads1Z@KYTf=)3fO7gE)>FFt2xYgKHJ)`b2t zXHB=fAwkzOKC5;^uHD8U0P;I*ELBV%qiA#=u&h9s8 zZpHIo{$jI^_;n3${(r;-BbuDQWyJ51e?;273oNp@fIj&;jEDl058vwW=O7>CI`y;r z7P>N0gT@sDgi(OL+j~qH_meBf&k9XLHRy%JQcgAH3zh(mU-pmXQq$?YxPOJJG7)fm zln@nh`Rl}ZD8!E+H8{Pe)i*h(vz}RdX8U!n2>t4dfEbz?ML9xB>9iVRLI{5>{_>fByQx?R)^r zJ+b7jan970MM6LTVuV2z4X%9PXTK@YwhO`|E`$k<%hJ18i3*zaApt$pXQ1e8A+DTc z*UGf|-YD`5AQbA|^cwAPcZ=xG!J8HI$+`%xF{9e2WFHhAv`L8M9CVn6zdm3Ecg#)g z+@0%k11<;D{E0j%PLKx!5P2n7;>gsjs_Zv;uw-=OsK68>Y62Av-Z~ThMLL-L*9^zA z;rv9dmRLN(i>H0>5r0!dJ$P65h?z9x!Tf;&{hrgWQqzL?d&OhKW!_=Kx9;?WmhJX& zS+Qyyk-snqJcTdtG>FY4B+6;^o3=iOk4*zzC1S+KswMl8@^h(12|)v{bJIpF%CZ$< zVj#%?4es&%W1@qjD#@w|Yq;I&2+@HrZVGBU{o7z?y4}^9cWH^Xu$15c`$@)Dq(FA` zH#P^>Uvg_u?AEAxp*2WxCCcudJ1pl9Ph5lsJGn78{YmP zmAaIrIIlJ2q#vY#ywA}r&0*Eb_AdlaiN#7D$!elg(k(sGqQsICuGZ~gH7qPs(&B8Lm8p|85!$}q}M zpH7=rF1;$AVpHk#pt99mSvO*-1Y~!bbevO$1hu6Va=s%zoH=he{ju|k+$Xd{9Q3=; zw?}xP#rx-*U>|ZhJB4`vf$3B~ay~1Cm)%sHMN#REs>PkdsH1SZaJh)kBL4ARn|Qh; z1`OH#!VL0zeOCmCoRA= z{ktQGcQp8}aL2IyjaBh6!V1pF@i5T%7u?||FqOIv;r)-3*qYwG8=!|_LUU*Kj^8(+ z9Kt&%b4KR9heSWI9r(SVpow2lJ7`ozhvCore1r4WpPOdD2;mmmU(@alC(^ zvogIA(*Sn1UL_Pqxc@@H)*ZAhT|tS2o}I$=>OlfzF+)8#$It!SM%wR~%t@)jPkx!+4kh~{Nj2)l!-6XK zTHL%^9x1GuoA83=fVT+@6?W{Pij~n%GS$gJscJ2wcNvM|>)-x!DT&n^)GdNL>|^s9 ziOElSxPc!7Sa_7c1#jAGRC_oN9HN+icNTdBIo0o42sfrrVqE4uO)Rt^T~|EFo!Uf% zg*^KaJjO)?)>*L$CQD;M(S{oqPgio`YdLF>L~PVbBh)`R-1n+(s9+-6W79l6vuKa> zOCt``jQRHPV(RoM9`Y3N+E)K39#;P^*WI(>rXl#O!Y5ZUVz=`#S$59zy*}Osfj#z zGwUEl0)-FG$A2jq$Xxxy2K@Ux|2h8rxuIiU(N)1Sn+JjMbY4Rp;tP@r!C%S5Cn(Sj zTFm*)QO{|h%28;u27*-HNL^ZEqv9%|5`r^ApfEupV2c_@w7h2k-hLt3_wJ(Uo(vir zLu92yV%vrcLA*tBM#8qV>LOr!=>#F^6mY4W7j!+%h_)@_Qe^(+V)vyc|BZE^_>zHY z6kEI4Gu!s*C|6c*s#4?^|KiQTPv}cP)>CSmb0PxstYLP~pIdZQ9!17*;6}ZIDN_f) zU^gNCr&IG_x3MOy2O--v3r)!H9v_}OuKB+(~2 zgj;51xSG{)Tp0K7uEjDyh`0q`Dxa@P@;8Q!yL-hFHP5?WC+HKNWac2aGgCS%a2IrT z2ngD?UwW)vY^09~7#4lG{3JIe&{~?+@O#vV#g)2hONX2OS}b28Vt_La z#y~-E=gpK2kc}!%GK5)J#k8IZ?z17sFe3M}ltxxz2WhwZ2M&R)I`{2PlXT<&Unq{D zlm}{mtv3iPCrIKOrg@bgsql|E_y5c=JqCFVbRzp87&Rewp#0CTi+J!5)-$MiUe+1J z@8nd?Rw=s8GpAtBk@hzEHT38EiwH!WE6aM)pW$C(`%iWAXj#+7N2BqyoxOZcm`*4v z5XFjE09uu#M0C4Aqz&``iEe(4lC`Nopn_{h!{ipEY(y=!Z`@3fYRMvHk8+m@x`@`O zhdZAl+7;^d%ZkUk@Y5|>EsjCJf!Zrk>jmue_4QRh_+P#?I*X{I91Cx{^=*JKki*j~bmm`H=zkwT?G33L$3dNIRPgkA z!!EOA0}x)4fdAUe%;I_r_xyn<;?YUs$UPMhrMVH_Q}2r>Pi5Ud6|@aM+%Z}yX?}V0 z=S}GQcfSw|8g>!*J@XDoJnw!_T>l)}M-RKnN8kQo5x!-wQqu2S?vjy8XhrR+whs^` zyl|#Tn8w}be)=*Z=~r?DROv1lVXjhh>@W6`V!Q0)Z~!Aew?(X)x;Rqp;nBk#n?Uuo zcJ^=5Km&)TUgug30M~GfsOi%$JpT)! z|J)$S0lz2<=KG!!s*x(<~Hk=@l zBy4=SAK+gIIM8^CRJv(R{WsSag&6~0{qCxZb1QfG_f-FTRB60WtN4Y*N1p$^hQ$~6 zCy3c3#Qy{L$z7G|EiWcQp?_K}qH!VeAZ@#rO?gfpka`F{kiCu{trt_*O+F09{%aN{ zdIzxr6Z014692#DLCYTF*!H2R9462={z-=OZ<7qcpzTz2a2($I{pYGu=H8>%0N%(W zyl6dP&1;SpzjWB!+_JgHO^G3~3AAB*KyE_NPr^>dPPj>ay1G*R-|(rTeb(Yfb-XRU(|IRvs)bDQV zYGq}fG1*G{&_Hza;c93WQFM3Q(OwQ7UW*LuvNps2HfQ)8uzB3M7K)<$4~Gl5M@eRL z0x-hpZ((Uqn}}G2GUOtHMXk2T!_4%p|Gh%#YU*u*i3|khe+ER|@{~PxUjBONG`_j2 zW+F~@J>vj{3BuU3h+A2!+5h~@jhR*!b||;$g}>@FPzw@Nf`ToW`7A9GN-UmoCs z2E~#;_nfRO-$qNno(pz|>?4v+RWYqU&+2-{;&w7QFc|Mg8qdcC&n-ivzbYNlOIv|E^DL|Lh}t|3W7N5O})! KxvX^<@*r#%qcg5mQ*A)QJ;(>bmpHmZVgXd>ZesJ@H)pzo(6z> zAp6;A8ZNOQvTl7CvV@`w=WiO@b*N9;6O2Zg8^y1>nhsH-VnzrDd02G@Gbpi}h>H>xH4yWB5xxwmO+ z9@5`H$?j-o0s(CX>o~476If!y%PwY*`yJas`5h zUYGyT=+<`xd<#L6@t2Ff>Kh_hv%!P;dio<>HKuhe@Zei16po-dHmcbxJvJ{PWUg9s@U76N@s z_ExVGXg^`&{6(Ej>z{-*hTQ_f%uw;J<|;&Q3m1AjEjM*zW39ILd;XFXtJ$7*o;RMA z-U0Oo<=FbzRF8hYIX+!!Q!n6}^-e;A4d*KJsYKZrnsbqv3v4V}dA<)%H1$bWV;!Zm zP_X!fqQDsO* zY1M-IwYA&b*WRbUzH3szC4avJK}sv1X|FO3ia$o?)C5j{3rRK_;%o7acvm!8$5RTb zydh2TjcLxOXxvS%bzI(f63t}VWBmhvvCAhM{&JsMoPviOmfL%VgecOQ%26C|0=x8dG*oj}e6S)DIty;+~; z%S4QYpn~^WW`9@o?_!+vq;kVQXuNfM)N?= zK=>;5m-NZjZk3|WyivQ6df<{EDB9~&0^T~+^Vw3m^Hh*&lDTZb=tT}g!{kdysx|3A z!ld4_8Uo6I7DF+WwqzZbN@4h6*c3$aSDGL7va;O~~>N~)CKe0XmvqatqT;g@&k8~l(0nw4r94uv< zb~51G-max|)(a(-hE?TUGoM2`yP)g+c1b-lbeul{t_guWHDo<6pAWjsoW4;86(ekV zo>f!d(;iSJUpDdWDVg?+c8~TQC0pGk!#wh7ja!_$5L9OCuj1ley)M3#niPi7^p)$EN3 zf20GWJ~J3;`#_N)%^HZ;3uf~Dqn-3(Xn?Q`?(`mdnQ*N+UaUV zO|D1r0*Ny#L*BLvq@cO}?%;-LcKCaV1l!wA@X(NQ)$n^l6esCUyzl9UD}3qFO*x@u zZQ)VnEivQd%ba@mkKg(yDa$~fdu4g6&B0>@DlNe?Eo`!mZ5 zD%aN5&SrI|7p}PCGBU=lR8Y7!Rik0E)7E6BH6g6iKi$8*i#yjfin#vFEbv3)jczzZ zPv4IaIf~CZaNs~{Mh0?fgb~F8d!FDH6&1w-otNxHy4Ei~BI?nYM40ndEt(nJ5nF|< z&~abjQam2-ge+{!qTFWvDQtox&ht(1k5qHnJbhb^DYMB8#n<-xXQr$hM!hS7Fjy&S z$|y>Q-wsMpI0Q)%lndXjL#j777yE<0_S(;4Gdyk!tof#yTjayeF;kq+*SY@TCvhGT zNyDN?8jewiI*aP3T05+Umr@1#(ggZ4XWF*4*A=ecp_NgD@w0p{h-*wDZ%o3c4#h?J zVvhwV!D>h((jm1%?NFSZIuglC&Gaw04HVWTmUdP7`0?ZA#l^+h4ZV1~mC~fhOTJ>N zuaDxBOvTN7($v_g3OFt~KdhM_ZOoq&kIR%jww}+Eu^!QITxqA>b02Gc3YNaHa^bzA z2g8=N4{S|VLqlb?t0^KoY)RFZKS>x$%moo2Gz;W2-^TGE{pBZC9yUG?9( zC9JIn%?(HPQ%UPr7&FgAGP*A{rv>y=tLe0airGK2YIrRPr|0}Ve`$urq=9Qwo}W=4 z<+jrLaGv_RAb7s%ION&J{pjvqO`}^}6YOY?r`f*Gw&AB4(Y!=Im=b$G;_L)9T zaMP24Uv{SH!fW83G1Y)X$|;~wJlBL`h+OY>Mb5jK)Vh86jo{uKFD)*6`uYc?xj5)W z1N5Q};s^r8~qv|*=2jkCKfuns}uLZaA^AUUDk*tDRdaB1aVP=7%> zv%u9BZL1X!PpJ%MjN*WZMR1dP`oeSm1~XEr570Xlr|bh&xl-#}Dq*NsXCG92Y(3o;0L6*cwirr3;T;J+J#D$1*=bXpenZm=9YM;lR}qUUwmvQJz0CYT*XBeeqV zD}N|>JuY@vv~;Ilkhk}8M*2n^H9ct2-~#OhRX?y9OI}#ey(3y7F%mT^jbQzyQW$oX zdHcq=XAlu@XJ79>eb~BDJ=L>#{`D&*)<}ZR(p4&tOk~gGib*@Ev*U9c3WP-(af#ctdNYFsB}uIJMhD77x_+-6!G?Z_HO z7F=FQ9HHz5;JtVdDS(W%+3Tjv{+qq&R&n@+!+-I6gc371S2ntEZjC}99Kr3&jBraC zxJ|pq{Ho&2NdE-64CdWY3g&)VVB7(dl|pAl!qqITJDnvnBVkMP_Vw&;TI)xQ=hszc z8D5J&SeQ>8#gpH_ZIsXLM_5w6&Dthm?QgX;E4r_h2h1$eI)$D>*2B_Oqi;L<>@@;n z4Z<1~T}7qAZtu4`k+HDC)*5&W7qu)Ap~l{Y+q5Lc>tVQ)HusvH@mP)hUSTpiNn?#P z|Ke&c2*J^D)UPSu8TF1Lf4z?n3U&$XbdZJ14h0*t!^hsLK@R)t$V6p231+@g=<4E! zY{>J+O+`Zetrv(gr6D|7G*ZLi0QkkC#I}|d(7td~OnUr2@BkVCz>_+J57=`o;J~X) zS9KR2D)9F}6tlM?ZYNZbb&2^x!8xkzUbvlAeF5rf*McG!jNsrAN_$u#RuFy3}%*aul;vd}7)r9n}fazyX>%~y;s7-T)5L|}L>{*sj;eVU)o z6!g=NBzUtP7>Y^5{EoA#3&T1-skMor6Y?7hb;6YmgrHe>n3?n1F|k=?)UJCm8{uj? zW^HReU+8Ux`U!MlaTKI1*1S*YK$HhP>HGUHagu_P7G)q!63fIg+;5Gw2V4d9hit*X z;z!D+T~>AcWCOIyTt~wA0fxPfBfRpQqD)EikoXEZ_$nA}k{6$>7W^}lC|bko)M)1i z9Kz&ZMlxd;BfZnq=iJ`w;>Y{e#=SM{l^LXh$jE?cRp@%MzSW4?y!JJ(8Rj?q-W%^c z*|xh2Zkrl_JbkLn#PT}}RyvY7*$TLIa#fg1po3Qy3}!k_1M@5s_i3gWCZ_0?hIs{A z2Kq8T?$73%!OLJLEVWT=B~!?6?t)vF`#`tNQBjZ;m^(DATQDbX_I4JYHBN^Gv7%pf zd$uWQmpC!BW_+zkrvR}5>+pmFRa{P7D=qoZtvD|fJx`N0CtpIql{KL$q%yp?icGM; z4>}yttxdI_nqQjT?-cj=%Dh!+<<$lg#O$zPc*C;8b1zR;#>~ATBda>;%DY7S`W?Ei)$<`6iqI<7#Xx+(P@C& zw=%~^h3q@-_RL;dh0-1Xsv0AM*l&e;`}gJ)cqM;I*J@w{kLp_$X}U?+ueI)}9b4?J zO25q9phga9G)o81-Ahk^yTZrd;7^axcD}=Z9Ze5mX(6dNay7=^jPo20=e!g@(3Eu) zxu)2MT)&i{Ge>qE3z=d?%xYw?Uwp09Aq+{jms2}4?r&kwo0kO08ESN4HqhEAFY$(E zS?+q*e&AaKuyJ+Wi=14r&J2Q`DW{Bfiyy9nmXR>Qk)VzzB&H16MzTN6pBCt)puz1m z0w&QFA*i9ZeD;Aqf!8qlv!I_ubu9sR0sQ6pDm2oxK4jtIpkVuhZI#rlJla66@kk*# z5{~rgi$bdRg(*h~x=AUa4$K92`@n(NrF#7OMdlFYn#K-b7{$u)ao|MC*4w`Y-HZBJ zRunsVS$#arAFA<5kSG8F{AT`P-axQ!;C*FwnRr5$OFQQpa~Fk;XAl~Sn@D_bHAtJq z+fwKfWhO=G7W4T{ZSB&RI8GFNC1vPcMyRGOW^0{rW9`3eg7J%X5}#Y#jAHOJ11D|NbZN z@3akE8nQuDz%gZ8Nt7Hu)30Us|K$i@@uZ9kdELD&9ekhwejro zs-j0yA$BC{d$=enJM43@0J(wwMB9AOW0ssW34fKmsD?0f;FQ(f9gt={&$d1@%$m=> zm=x$+k6f+~k@f?zp_tp5TDsC5Xd(oXal@Pf;3EtA0 zn^Y9km>RbDX&#MFP*$(C#RK_~et!Yl3aF=+T@Bf99D;GFN zdl5UK#(;N4Mlw}=%L481_!vhs14$v-enjm$|9a4(E0*U0d5~njtF0Z8Rg)iPgw4p^ z0Uf6bQ@YGxn@Fb=5`uywbeR!>s`i2#3evMB$Ly^~^nkvj6^m$Cv1kc+A=5z68~FL= zZJI$$v)_q=_@eYHoL7fjfkwz=8$B0o0y(r}!M-CoGAi&%O~Lw$k07%G@of$-Q|~mt zm%o;$Z{4P0DWM{TX&M&=@tIjZf)I2^v(Vym0j-9~-wYswiQ8J(JJ-(!wtt#up~(oqg9uu*@}#6# z4Gho34oFjORK)CnJilK83(<_u$WUV=C}UoJX}+F<0NeXw92m>cK|pl=_U_(JKyf+N z-xPFm{%@c3M>}t zPox)g5}FJ|Ha|lBe@qd?fQCrLIz?f2OkI!1Uqk(N4J9p2F(7Sb-=Ss4u#gucqxbEN(UR)idO@03da0|!B@w9l_^uKwWrs3^DD*2h*=E$>6G-ZkxMc1{xp>Dm#y_vt! zDsM$n?9Ibn+Y!||7nMzA0S7rvs|WaRm-&&yz^^G(Wre-RRG91rRI9@sU|B}JGU{7i z!fh*n!o>$JGpZDI<$hBhC=;(0Uedo_V~EsjgO)B!77vfqFEvrEd*C78=a>dgCO*C)|y#yivc zMi&Rz=y}d{Y2R0LVWk#-ibkJQ_&a5-d6F+TkCVk3tMrhk;%lZ7P0~->T|2eDJYTEN zdL%X$d79dA$fB6CY?u#9gr>1N86f@ge8-JQs-x56+JR6S^HQoUi^~g-_&Q1)Y7SAk=Leh zK_j2nYJ<$v0@p+PWFwcmm5$LR#(bAIh7o86^5g0_Lz~rY4|C3Snlab9hC?P^gbZhy zgu)x>3kc?WT`{VZ?$lHhQ$#6l2ZVcGKKn@h@+QpNef~cLhKj#S;}S4?X~DRCdmD1X2Kdw=RGDmOY{_1($*tCEVm{e>f#h*2IE2yQ zAMxr2LN@^(u@CIw$V9Mwo27NhnA6R4(o`No*GK2m^pW_n;6;uJiLd3svf2K~UAxA+ zIq~{4>6Fv5kf(13029+R%vP+quDa!Ewk)FYY=9YanU<#@Iiy1EOWn5-QLN(==Pv}E zUm^i?>c5Wjz_q!w!wuyvv(J(5pS=Ye{R!>Cm0tZ?K4_rh+Hwoh8n*Gn#Z6soTbrj- zaAyd+gwm%x%9}XkDg;^Ni=pl9)2wYzItFFn)f1m4k8Cc$EHwUgE+*oI<7 zc}ORw zW(vdGI2rTjUueu(w>^c`490sIr~Du40Z&{Wojaahouy~cK8 z!T-oFHAM#ogfm}DsEeN%9evm%0{}vG$@;6{s(?*=p7?Qk=9kzr|jxZFJWkT8pfc!x-$9MEwbuINP($V|U>yFkg+XRMdEA+AfW1Ry$|L;G(7XGolx;FfR`qPz~$}3-IOB)5^Yt28gV@^yv-hz zEd(8Nmnb8fwqajDFNoKCP- z(Z+LAuN}3)3hL+8gc-jBI6OlVpHZDumf&-_-`}JyNeEFMb{+diMy>fnkXclw170Ml zz*ZaPYvoZjUT5}6vsmF2{`S>Rf-Z;BwxH|w02NG}vE~z0 zHcOpQZ@=|{1j}@5&f8B56Ev0HlYlJOSuYINX05&+x=kwsIVQTk7$glL6^zdS6E)`e z0*cB*kmZ0x;TupOK7pQrvS#zQjZdNm+GHYs8oiW3v{y^ z`({|zK3Ijix4~`U^uyd!io1mY{Gx+O0h)*niu(~(4Y+Uo0n`c2tHh*lr39IY@0Q-t zA2fv*Hy=238x~!i9@aOQEH6HEiZ(mxWx)QoVz!du-7enK+bR+K zy?$`?eouP%>{?ZGVVs+x$jyt~LfetBE8FmO_80^L@29V8+SxZSU4jrPuJSS3o zk~;>9Zsxv&nC2?8DgmnASyXr+u|T=L6Kn^-)oIyz3ebsQ2+3D~2ltx-oIZ(PM#$=( z&IJGZ>%&=5thbitLm_(Ul5uCn=SFH-w*ZTt(|s*~4!#?5&nUZn8@}CrM%>nv;=}3n zssEss5ZYcSY-r^b-ZZ|j7S5uNAOr4GPRS&g5C^M##&&@(!u$9I1W>s0ICTi&K!_|f zvxPa#aq;@;GX~92WhIF=bXT;bMK4RIsK$5t@UYg{LVXu)+A-mfPSc(1FS^A%hpX|{ zjJlHB3x3i^hT!H0wehq!BZlY-UL#_}mu1d2r+yw`Wh=M_#8Y%SZSCP zj^yH1r0bTA)JG@Z&|)aU1eNyW$V-8j1Ak%S+V<$rJ;+@ychz@mnyI{dBv_X6p!NVc zk;<5f506lWDSc7OWAAU=Uc0f^Yq#Ot;JfsZW>YXE+(54$vIF3t&A5u$q-Aq5U^me3 zuG|x$5tPFqKAXGPgTnrYo4tv4RDre&B>xfri+_$io8_9&Jf0z=)9h|-VNpGH?UdEQ zgl3b2I&JQf3C+5ofMGW}aeKnL36v>DCRpd;$jxdFkI50y`yd4Kg7jomb{}+AmO_GB z87TL_eQ$p@ALzv228&}K=i2UlYV+#F9ycE!@ z8j?(IL#{o@bS(ZRiZ)hPaK!}vJont|LoiIAha-8-@JhJOREYhOSyKIMtNwC)8zJn~ zz|r96JUDQr@h{EzKF*L)>KJmZm*GqdxpY+Tb28AA6mszofLp3vds)^#1u{L)^n8GW z?xfM(8&KU{rJa$BDV$FP*uA&E^7DgW`vH+L{WU9nP64*|^O=T3GL03r(6hq8?enRh z+doG6T1+qO!>^Jrk0P&oF~Z;F;&qs@*f6-4Hovd_NNIim#>DM80PG%x8>| z9qWo!YpChEV!Q|?_7R~*mq25C>tX-Fi$4n%@E?w=@k?L$<85*7S*9d^=Ky=>WC2C1 z@M}g$HBe&m&l}SpD$8P-ZRfpVcRL!k%B5Q>p&&$F94b zrC-JuR=%h1>R*lesKv{>i&$*Wbo4x^Wyi~I7}s-FfeLCY<*Sz{`Nj<*eMNl!fk2(0 z{VDH(7W;0ZA$jOhQpB{0iL|tfc86{7C~0Y_%HH=EO1Qqh3}ZC#RRL8YF>qd7A8wDU z8Y`zx-A1qtVcELKK0a2zZH)}%-EPY2-XAY zDr-cmZvH3AZF^Ro2R*nQE?H7)0Y@Fkj8{tHMSApCdrj)tI5Ky@-lm0w_a?1~0lghW zthFx{!OKTv0@)wasrIa9*TPe~AkULVM8~rfMaq6*CxKtA)c(ib#Jvisd;8+X{&_@6 z4ygMJN;I32_TRgJE3Nz(6OIp>gZs5*q+NWw;z?=ar0(Hr_vUNV5bx&Bg>$9taSBwN zWJlqu4R$H6DQ-%EYI}Q0s8RFeyv|hlmK6za1NKI94VtR`ut;%7?AgiNFCg5jc`f_C zJ@%{1Z`DL+p64Qcrv>K}_!1QFPt$Rpr}4AoVpu1~w)uSR=%!!y|2tlTaQ|0ULikUH zGJ)hwN2#995ko=UYjXybPu(O3?16%TgK93HJ-9jT#>4-xZO?!C>jr67y(C5ox(4?( zYL*2YVfzNo^Qmd`=M(`0ArR_jvZN07s_@=JcLPAo=`&*qV$S~awTYHt^@2cCjJ=w_ z7aaTXwBdWk4uNNEu3q23fD}VlAk*wGNkRvA?2~LNCxpGlR1ofdwF4@?muT1ZSo1eQ z5yxQ&`@DFsEI5dTC@0sW-wOx>i!(+?Z9~e zh~P2E+#-J+Mij9n!h7NVf9b!@y}*1Am_2cT z+s45cfF!*ypFmD#yc3G-8%01*Z1E(dH9oJ_QhX}d~O)2ESG2h|nbkOaVd`H$KQ-`V@HkFdlu(Wb=VccT9VcYOF z<aX9hQ?pU2nC1QV z1r%#?)<5i8(Ah;})@QXuNle|f$Fqe-iutv|4 z{ML%Uf6XaLdM>nMAG{IkF4@-w3S^fRLp_It=d*i0Mn<90=qtfKXd{g0z{2&R(VJev zdt2UBz#bm_FPCmiloX1Un>ZE9&Co|iF6TchFY_3$YpzTb5p#{Zd`NV5335ox3$o7w z0kMgiCx%zKM){rp*%$gi?LM{m zN0RE7K&+YE!6qh-mj3DbBW$>AJz?oOvc5JPen-D)J&2uXFh6|u}142;GB}*;rFu&3P4htoI+RB zf!co{T+p3QA>0!cDr+>nBL*e=Zm!iCeBG)YUn#Y$DgSr=A!1IdfD*C-JzY-}QM*ag%Dutu2y`JND># zkHpc(W%I8zJXWN+=Q8vipIQRS8NWjHtzgUY2a8tpPv!wq{J+KrTV&rMk}s!7P9z36 zCe@vl3K9o>w{pmf=(sZg(*I|3=vB;>mqjU3#ihScu$4!me7J^~^ru-+*=Xx`30mS9 zz}dU+mK#WSTKm55>?lug_xWua`XA%BYlz9u7%kqGV<>8V(|#_yd_opmCx7#d;SI%$ zt(I;rJpLuZzw{T$6;{}Q`07g1 zpNGL1*Csd}t-oA`8kbk%pYpwYyUV2DuV5-$FF#3l7%lps8gLUUhsv7w$eMKYXCtTz zp{H9|*gA)QoqW-*nd~1E4+VZ=>8L(3Hzo1URVRCC!f_#bDk#J6K&avG%>0t=v`F>L zIn|4>6@YJ3d?hO*JV%eyzf{b2Qz_7sJt*iof93{<+)Kai!^F0$M(lciO)( z*4+C?u@jc1)xGSma_UEFYo?e?rpCTB;be7l^Q~d=BVz^UOLIYKm%skeB-DShUf?@Z z7JMhcYDI1>2xEqlk#H<}I3a;b#SUL_>|n`-I^WS$i8RItkG~Om*YosDOidqh@g)(A zp+FmOwOM`|9_7am1mFqMz@5gSHv8sx{%6kC`m?(dJa9eQgc$J2I2O{ym~ zJR9^+Thb95W*5OjDF5TEEMSS{7$)bZI2OqL4rT486mN ze$z=S3OQVWPR{TAAK775&PMm=OGYqe=wx~3Ow*yVE|Z=I&uW{7t+5-wW0Xzlk6PK4+50xH0f7q72z;S?*jqzR<_I%~SK)Y6InVaO!6Yu}oxsC0C z6d;~IhK&Ihn+cAZq3kfvFcZd(?a~<&juL}EaTn#rBn?HA%`53cy~BgUI;$ytaKKV` zniAVkf6U?p(wEC3u79W$jugRbPD`o5Zt|$~;1{7H#PA_;8MsjM-C)1o>gpQx1ZO(2 z;YLO%Nv`FAbel*+M4y=#f`gdE3q$Sul??Uh#-(KfhmiOG6?m=>H&Ni*NjF)vz&w|u z;LhBU!cClcNk#WrEM;KIFwrEN+@qM9jvS z_z@M1F~pW9s8t;{a6aXF)Y9Uv&AV=fy!Nq{ZjGtblgmr+K$6pyxB;{r>6FOcH0dj7|IJztS9hF`=mVHy3Yo zUAu73kMeoV8WZt*>;Ur3#qKTyN?-SD3D;D@q7%vF&DnGL{YkNUrMq zKPICpKAika(O}FKdAe!%lR;-N#bT)gb>Y09ZPN*Qs3T@f!j930^*rcY^mrlw3O)I@jbJl$nGfxmYNbz# zA5Q!!m+&7l{%8De#8%3$ZJc&ZI9D1XDURcm$Q+lTk0!bT-X~9B&>A&^NxnKJoyVDS7}u7`gS}&yy4Xqrk?GGpPBK z$#QGWAF~pR66z!p`5-SEJ$-dp%{KYJj7jpg@h z#HYxk2C?P6$>DXk@{hK}t4+llFRmEa!3FTA^fBYo!!7G__67SNu6uZU7M5#OF_o{m zspxzINx54-@))9lg2EI1N;Ijme6#)(t^L0N^Sl@i8*%+{S(1Wf`ZIEqjU%RB;#k6f zMw5*-rt_ntU7W^P+C&^`9aAnRA>~X@mg5~U*ohp2&o90^GHohsE(~^*D+d^6^WB6d zxkqYKsh9|bmQQk3eHQ);pPOwZYClX($T6h#;XwW#ICrP@Ho!O>$M%Y&it+>KBjX=w z7Zx4jQV1ta1IkZUXuY-95Ul^M#fbzqKGtsdUE+l8%h{3p6P<@w}F@cT8H5 zGdZv!*U||>)&-Cy(GU4ES}#gm^uah{JSCiq5H0c&$HZ%tTJ9X0<<^PHg*r(28_HR# z#gm$MMIA$FCa47mU|8^;46>4=ILjtA^W?K|Y=noD#MCnr?gw?lNH0#GiPdOH2zRXE zQ>5;_PGRV!%Nbvj{Kvy(}n*9a36cyV|)XV@O=WKvjM)`A$K z&s1}tOQKKU$WDj<0#P|9=D};k&6G^|0&|(EDdp{J0>eHe^2v)bR=K&EnI(Fe$AurI z`@zmn^i=!p=`*ul$!~v09lI%wq_ForgRcerQQSMcprn2_L;b(RHUU+zV6N zoQ&`Y@ixNH4-qqVb8Ng0>NhQN^m#xoQnf`9=Oiw`F5m6-X-^p8K3$FY7fxG zm|~>D`icvl2gY*e@7UfjV$>?P}b>)W?L1Z=w=i^HiyCV(WdQ z!tb1m#!MVRMFq!#0BxD(WR2;fgmC0Qmedb+YzoSJ58P>CF72mp-qO>mM8FTwmQ#)uM?+YkNTjEQqzvg_DEeJj!_7Z8MrSkTR2H!Y>}=I}_dN39Orj|Q^Hm1M^Rn*MxjE*RdfP&kbixS-+7Jvv#Ep%*`O_@lzYtL#!HC%EW zd8URm_6sX4QN~qvhHq@F?v0<9@$w$O6Q?X2!pcDEqU@ILphq~YjCI`5SzP_6wlSNa z&yCOTP@0K!eRiEX>wUY9v2@w=8!jcroZ-`nF@*1R?)YgWP|i)l*DIW#h0;K}+u5_* zY=_u^Emq0ke?l>|&v6zQp!nScD-bSDwLFy@D^#luSsmxfApkDNO%%SQvgFr39xOvE zXde^S{=MMXQ$yK-_dOK^!onV(u#ubMb)QMrn))?vFnFj79hds?qll*7rO#!auS!P?cj;uC^uG>qr2zu2MIa{^ySt+Z7v)s(87+JZgd!T5>ri_)=6*)9wc$}v9dqLo= z*SmDDJ;7czxf}-*x7R8e+}4gWRG}wDiCZb}{Mk{)DS69er{Ha(>sNsb+Oq%id;4ts zJ5(N?%SMZ*IhL9odsK#%vZ(SN;VJ2~?YnkRN^0X~;B0Sw(ez>L+#9*LvV}g+`N0Yl z24+{m?CE>^l|&Cnxt1y)dvwmn4%~L;j7Z9PVPRn--~~lx^j1s2&CLC2u9&n-`KIRW zO)M)P9|ku@r2(#y!mlyV) z;7uRxI*;)gS!5C$rh4oLO})Iw8aR94zyZSxo*uY99=$D!N4By`RiY9RKFt;$j6ssW z9VXxeU8?K@RVjlvh$n~f#?o#G z3yB^KatSldp7Wtpu9@58MqKqI+2EVocBmrw8*}QZmsyeWHUoR_zZqD-!i0Ov>bsME zK>1@{qgLK0*UPu$JS^2|dC`5RCMZ|~8PyOmxtQJ!PsxNkU1>u`O^>_Ew@8j|)aY{8 z$KfdIwK5-G95Q6xfQ5YT`vP@zZoROIyil}Lde?gMH#KV|%1aozC9I*OriPa1oQHlS zKy!mmcR*j;ZX3{>#dI(yAhE4&*dg_cQfI*kSZ$!MqV3 ze)Crjan$ItG5l`r} zIaz_Fj=|~$15+Sh3P?nmpIe+oEe`fItjwo4OG@%@5Z@yz8udH4AVU${u+D!7GWJ~d zVrg$Lcq|}z6b`M>y!H4}Z=p4d>qmU2_#4CWbp78>I2idIUz)~eTSw^lA5;yNnKHkb zz`^KlwX+&N&oKZMY__sD8*B5zU16i7+Gch3ysV9{F(zWQi}!K0cP)}Cf-0ygf``F^enw*XE-_Gd{kAq6+vqQbDJWc{|NVAaXi|D}SXe+Ve=>{5#{ zcc#3UJYy&n-K;FR5Mup3D6!Cd*nc)3VZ;a|Rj_sJ-jewr{O^;m$U(jGp&ZL3r{d|u zMZ zMx<-^CFrok;Nz%?hKoZ2l|}<_oQ6u}zXz>d`oH<{Cp99B;q*eW0C8!@LDJHlyYGfS z4UJ=*X77xcJZ21+svzNwPR`Dp6uV=>H%%TB$!-Q#L#eyYtJjxTREkS~kLu~w8Dk7w zwU(U^`!pQBMwVPnVU;2dVV#P;R8dofnRr@m#c%q!{aP<`n+RHYv5z3ygy+`=3@ z75Cend*Npfk#6u!QKPiD_#vQ-m3MAdfTzA1OW}(Q!Fu$Eavqq7`su@h&){Wus1qlC zndzyl(^S5MwU$xVc~M>-KE$0Htt3w$eAk0O4q61?pU#Y@TWh%# z1A;+&<^7SKlf?WFzrJgpN0RcvBr!MZN3_=pj>N>PDbMxSX${HpXE|!Em2+~#PM`jM z(k-1jl!bL-^>PMEuLfhBxG#F8NG_=7f8`_yi_`$#AA9e_Ny_!$sKyUUdj{%QH#z~F zs9}F}@?ddxiMq!5SQA065oF=0X$O-TuHTyKkSLV= z7rw%eo)>tWO-FYuKbvCobnmNH6Qv2_u{(>)@b=P z-gB-7!;a17{voCsY&w)|=YfD!(WbQwc3-^yQ{*YU(vHbdr z9*SjIKp-eeoqQptIousFK!NLzx8Q^AK8Ld&wPim&B6Lyqh*l26q?E|#$#-s@`FL+j z+AL}{J`A_TC}C}qCn8^;-3kDL_!B)d&Yc&d-eb-#riFX|2#7a1WUU&9?li~*!R`Db z1nSGPHe2Ut{uY$YamBE)%kncP1dsM^3wmA>&07u+orUYP;AOtG?SwEHBgs){ei*uU zspVp5SLRkQzJf2A-i&{Z4`(m<&3tYi+O_@EDuks2{0GSS1w()E{Ia;T^I$(I(&$9( zT8`e72uK>z)R)Dc?yygD*uyV&eHK zfT6}N*NLWO5Syb(E)$%DZ8LooBPh8;=kH}<9)hy*_WbyN8?U;ZKMB987&!wDX#e>3 z&8PHEP@m*C|2v!aCl9ouKnmH68a3U1uuky1!@s0PNPZu79@Tg4S|fCYK7;uo(byH! z^QXrVts+p8oIiU&JV?&%#~(K7b**i#Kl%P7ihHtU<70yTh0?;ezY4nBd}+L;N3C|1 ziJ<#Z8P_^7LxPm@I9`AiRQqD!34X))4TSJ?a$sku(H{m zs<&Za(HsAGq%O5Uzkd2hi2qRYlXZ?s1zV1PMKr2=OS29y^dC&@LIv&He)vbwfeXey zg0^dMF}LCH%`+krA7g@T2DoJMLXx4F>6a!EpT+ukh9sL~I{$*MnE1kCQ1qtCo^<{# znWySG!T$`fDG!wVGOC8#P-!1IuE=X}mSEBJ{@c=(D|04q?- zWM-F;YMSZL{itC62Ht11iT?gwH+Qd5McHE*;*a^;RnYkMmy^8$0B2vs(%{}N)st1= zbsEBcZCK;x_*$J8p-PK!+85o{ddBeK30+PwyE(ORw`nPCA8g=b;zM%$xWUayoO<4r zlXJt*G1P{*6Vd?VH-Iw@;FLzn*361F4IUJ*#ZaTE6I?kHlOEg62miM4AC#8?I0a3Z zV>D(H^^U2f63&+8t{_Zw$KOX%rs8#6oV!|&Bn4}nIwczQfvCbB4eg3_=?m$FyB;wR z%tahB(53Ig=#5<`!g?JYJ7lKy`#;C2z_ZhGZuY8V>IOBfexAzvPN*?YhalLX%2v~_uSwGtU>hj|=8 z`^z?KKTEE~7@XmiEG$%hl;3)iSQzNA(9?PI=1oOq>xPL&(|S-0HLut+!{=oDJLsE7 zSb*NIob?qG%>TR(!`Sb44B)b^{0I1O=$RX>$n_o3BHAvN+DFaw=HxUzcUM;lY1uhs zl-={;!v}|%qqB?|DUbTU9{wM;-aDX)?0XxJ>qp&Hk#%jT6n7m!R0Kh!6I~Z^r~(2~ zg96e7q!ZepuBeEh6M9v8N9i3bfT2p48k9h&i3R~feeWdr{e0i|_nUukv&qbzTh2Y_ zInR0S;dsSH!suDvRJm3AAi8wIM7qVip4KjK=)U8e33w&NNwHy%vnpKH7cIO5pz z1EzB2$%1U|0((wiACW;KcjaqLEvu_mK3(Ff$Qmu^c%Ikdz`Tb+tZ|92ErsIyr9Izg zqA;O+hN|1=rlps#UbD`O&@diUg&R)NwFkChOpZVO1jz}1-2-!RQxf5-0jJ}CK8XGP z8QCOF*&9w)FLYGVnHs;ff{q{`l)6y6FwmGLr&az^;rf|KwDbFk31WZdAwC(}q$GoH z(%v}#^snY^Jm47yflXk&OrXQ?b`tM$x{ogL9xddyRj-LaGl`BkDIZnuI`h*348SPEIsj1JALuY0u@op1k z^^N>8x4s4Ct-CE83@?lsl+JTn|KKS`h_GZOn)&uGyAZ>+LjVYA+Z$@_i#ThQ|5?rq zMC3h+{;D=+#V(GR%w@SO_DXF>3VBC%d)FWWiuayA?Mj#V9hdgp6xJ7DVw)ql#I1I2{3VS2E9%{OAyzo# zDNM{6W#i(UdLQtWh$YKO`%xbNU;nU>l1`JwtBcSy-Cqa{`Pw&zw93bb`LmP2X20woNFgj({q$88eeA{2=w~>Q*LrZz2?rqKZRg|Nivnd+ENdp3CdPa z{@!AnsM)FK_UQ2J0<09q>!^4Jy!S(yXD{>bSIBOTxpHL%#~}j$i@p9@h{D2C>MmV8 zl9cw3fd}rtoA(TsFo+(q=`p>tTz@FHc3!x-FV1as@d?4dLod0tnu>O)`JvP%0YWho zGrj`_EVb>x$X|W&;P6R?Yec{;PXpaebqPTv(1)* zS{GWj<8(8OoVAL%WDHB-gD`$tQcxKJinyv@(F(H~$%mgk+BwZzyu{BvRCw zwWL)e)OaUwwvnDyHPkw^DuaWLJOZyYEXSJwCU2lH7#L zl>n1(G%P*#KVnF$m}42MK_Jh2M7h2HV@H1n!7U*v&b6h>r%iDpC%)Ct#kj)|m$3_* zOIpCy&>dTOx1;-Az4nk~xQ8I|6*Bp|_^#Qb>p!jru4`~bmBqgkkgiYE!>McOF;+H5 ztXuoj|M+Xb`L?uEWtKx=PGVWG(r&>H8NV@nzR_Yo!dm^dwDEmiS$frLSh{&!r>7i1 zpSz6BSTT`)#b)K2Mjo9BG=9(Zv~j*Cw7was#$;>cMSfAOC*cy&`~{JD6aq7jtu z_pK>8Z+)P&4aRKChsEYWd(B> z+CL4Y-)4~>Dj72p58nnzX#wyy!x4=e!0mKj=DR5`W`3O3bANV%?exj zh|{`&$1=j?Jjg(>Jf&gQ2{H_)KG2P`)v2B?j#NOQMt-EFt=e?u=o18_)w|S-za%KK zWk?ifb6p%R{)~R_iCJQMc0eM*6CL?HUr36JptgPa^1t3+yj4mlbGJaxkm9PzCDdAw z^@+!RSgyQ7C`Xj6hML4tbK&h-T#6g@hv;P!y{BrKMUtAs2JLr>t7uJ23Er$$BvH<_ ze^qd6Y0X#@7swcyw_*P1*A%;@i#_y6_LGEie!lEWe|d5wsvyc)I-AYNpv%H}8lB

)!mL85Hk`UuSHNCp%?uTTh`NFPReB&gRxbsWtWlRQJN`pSOsT};<1WlQ!q(qU z+FRXcz;Og@%hPh~*o$-jx{dac_=m85AMF>U(z?QWrl05c->qE5gLWo8`t^_h96I+% zC?uxL%;apIwn>tiQ`l%knSJoDeS&|y=X(FbOV9Gc)^};A3mCh1w}`LQe0LkhYwnFH zS-vJYY?@xVJWoUt=DVk|7;)a2(fn<0@ypf0hJz&DXiIPOU2KcL0(nEjrTK*Q5nJmX zr&3|rjHY8a@enSPM>QQcL4dKZz5?>wO5cpkgLiUf>}J2P|Ae;eL#y!0wyoW0IUXvP zCu@<0dheMWcSwgCg`;&OT#-v?pHTmCa-`9--*5xZL5~DH{4UrpKB~mISzShyi97pN zJohWmDlxJdJYwd&sTITiydmKb#OhmKe*^pDKMY0e?@J3+*@gPz>!lW((bXi&MZ7Wk zD{df?eQV=b%UiYio5|T^Q2Nidf2-wPq$&wml&pou5gaxJQP?*(N;yv6H#4ZEI&mhC z;&`QiX7e!X!#L5#G$=B)s_fz3#e%J7Mc<-?dHpPCi_v`^9v;{Db_8@_5{jdrdxiRw zDSaGwGeCPY6TCiDoATr8=W2NQB!)GB=c^yvb?A(*0UD|oZ;{etS8T|~$LEKAWYuGM z*(g_kCacE|Z&G&Wb8l8*aEeiGZ+wc+=eG}V-p(rQuO#N|DDs#?ora+kd*!O<@uOSuD*}5ib~@;tfcK8@x0__eUK3H z*+DK?(5~?XxMVYGkE=P>F!yd$6gM9X{TtJ2MQ}WaRruVypTdqga*bCiL1vNA-`D54 zp{>H#D-eg+i46~iM-aWsYWVgL5^^fKDN`!s>A-P+S|*bsbs z_3vE$%A(<#zB3h*N$FrZwW~)3Ukm;K+3*tmYdi8ZA{iYPW(?1gnsvRzGQu^u$QGbX~@+5SM#AGUP|53 zDZ=E6ot!AvVN-BpUuzz`E=4{?;tit)+Kqj^!t3$AgEbZ&8S?4d9%QlOxX0;%w=K|; zKHp8=8CXJud@t*M(Ouhx*2KooGt)COu|8iN#Xuc|Dmn*8M6_sWN=U&31Z`a~-Wd-# zI_T|ei7Q7x9VbhpzBSXw9L)QmMj@hB&&7c{%{~+7v6O1Lkh1jYV`D?B`*)4tg0otW z>RZn>3FW&X?@?rWzA5j{{Vc)i?`o&Zyssyw4WM@WM>`-tVP*`Twzu$uSb%qUc5c6@VCI~WO{+i)G!dNZP!q-ww@=5KQbZ^ z_Q-tAlwI(_^SyB^y2}M8=(<00p8XdJRb?SNMh?e}p;afygV}pJx9_>a%!Fo03Zi5{ zlr@pq=-j^zftfK-WMs=aq}f3J*cRB_+tfh3hfU3!*_Q~I5h}jT#@iFcq|~G z2?L|B3%e6Q2A3gxhTN5-UTQ&qyc4^|XBk)dm-6#J>oTS%XIoOprl8}lsUQ#cNU|+G z&WQdF-lOiK+mvsy8oJ!u2AHr{gJyaUrc?oA&Le;1z|uLR4FA=sE4{a`_g5!J9>^Uo zOS4yFq>sV|2hJ1fyBS{*CAw6NfQd#I|! zUL{mIGugGuzs)@9>Hs3?o0vs8$=Jf5`37Xt9NgIy>lf9WEQNUz%NZ?1XCtVJ2dr=G zsG3n^v_9OJl8jbloR#+tKCm>5&Q%XeST%L;Rlj_6Qq4uJ=g)@M<8Z~86z^|K=_gR8 z(H_bWWK04+nm??cw(34SyXc^kKXgR4)tyl~*e*vK+eDqNn0%}XoP=SwlTed-VSkM^<(O6RzW-iO&1N>~{s zyKmO>y_jo(U9Jb&?-x52T`2=dlPah!eEz9_(OG*xlprW8Lwd z!a}p62dJR#YIsUw5RjE2sC+h%^4;B2W9q18f)Bm6)Ovhbud8=KWAOY`R_i-6WM;j% zv~5l;cPvK!yjG3%!qeE;jUc>qrfcV`^;jNv!lTZF=U+1+HPp9s`dD3ALEam`GbwES zj?0+-IJkt8#&q{1f0e!9!wCiIK9pIihH7C6`Ee@+U6KiUQ~Qy846m!^nuqCju{jv^ zBl*6nK|GtD_^it-|MqzsJYphIYJgUga$xO$igICn&MX4n>g$_!ATgni&BVU27JmtH zBkA&^!nZ-U!Rt}f?`XfYkl}&8=B;&zS7%ARqTe9Nf*V=j_E)4#(!bN%0!2hU=DoU%7 zDpVqWzN9eWiJBzb?D$uraAhvvQKHfSRYZqATGESQ%{&Gpo|<=Y;+7!GXpPve!ORwF zWS1Rc_vFc=Lm&v*&HOO|83@Zl6_6(+=o4+ z?v)n}+p7XRy27c$a80Y9Be?H9N`3BTsK9DN*2Wy(53?bgOAavDSvWO|>pJG04b32ub6fH9#CB*!bnR38Mt^_5wmXEotd+cN z67%`@ZSq5Y`eh$AOAtY#E?O!Kh^u-%03G5nIM01!q8X<1z8wnH*}vMNNpx-tUj8-# z8Fo>lxFfbWd>Bntgqi!MbnJs9XAT>r5Z|!LM%c@!pQ{vN zUGv*mI$TvI44Ww}J2!;y(%rwr8O~I4%oI zc#XFI|18-Hm8?ssv;hzgFuzw>3l^;$SqUs!SwJA}AF*`ws9n2db>X2-!+$VwLhtLCJSnW+R^D(wKgDZ} z4l@r4x<>X&%@iTiABeDIo0Nj3d4(<1%#MG-+*UQIkgd6}YZ zc4J_zCiW*J?Y6G}{T4;)y~X-T@{DfSmSJ59X-U{AXepq1r$3ss+eSaA>VEhlMAk)D zU)5}6LDms12@#f;gX#@yu1ZKFQuXDl4MK;1nlK*IK~y2FW+Mlx<+02YBc~gqHmhES zXsKYo7`f)~>XA4#wCaW4OB?(!sY1fFDR^#h#cnXW?U%X?td$=>4M3HPD*%hc)}Qzn zPW&aF0D$Lv5MJ%|30WMjsPgEvvE(V#O>hc+ja*laru12toX5r;aVPa#kY*#2s?tHX zUV@d(5PCIfNcw|`7CNRI_WRnO%0zY|1G{hV5yhtFO)|+iDfwOYC15q_47eST7GMk_~271k{0*?;|PvN!EbZ zYb+f#(ReWQ+P`+^jQ#|W%oUciA>r;NPmKb%XXVBy?+E(O94!Z8Xt|i7qQD13a1FH~ zkRk`fxys{Zwy7w`mIf`R&$)NI5-wIb1a1Mf17tCnkE z5PPl|zCLmGLZ$upr7E!W1K!rnC($8YgmgQPbJW(yw9Uz?fBK4xrQgOFJYr}H9JUuX zV@W2G@B6o1imN<9eyAHZIYf}?s~xr0AX(63lb)sb|1G^knC0%b;Y3<}Ww}V*FL10v zj=U7Zdx+CYoDH{b*J@1Q6R`8lhFgY0OM4_rU)Fs4um&kRBkfI4Xr9b2QMZX6eE(Xc z^qR|hUg1QW($oXtyev6x+&-)#FW`-ooLJ0(O-Sz7=^T!q-|csX91i=3QB#62uFxE< z2}*>Sp>Wf|KCffsWK=Bqk@v^NtCQfH_EhkDKB-*kR+UU5a z9=z8;tlXR4?)0N4VZ(jc$vX>Jf`88euVuyvkdw`CAISlGDY(qvtLJStdI8iVC|v`e zUCS0zMu_cOFGa0Uz%F_K9*Ih-qg4nMa{GH_7`|zrgcZaJB zTCIEf?|CiE{K}!XZPA~by^W(w(9#!fCkan0zp>{&mr$pkrF>Op7urvG>dy;MP1|cj zT{d^IEdcnl+R5{5LlS`SVL|{{)bJ0c*#9wvuF9PIvrSV=d!(!1k{$~CUrQXmj!jGd zLWvg^D6a|L!_c{X291lbbVaSxeYIcg)A7ABy$l&h6aBF_1((gCE>c_+)evq|WA>o^ ziGtOOz@7B^QR-F%zLj?3i^wTD%S7#u;{3M# zeY_^5H~Hn)QED}OMynu0xg4B4$A%V1kHjC6lH(BeK5gO;HV@K;S+-~+(3ha?FEXt2-czu|QKKKy}xh!;(=aVkew6Jc)$#0@bxp#a} zcwr5xWqTx-!B%ZPn<+(HT?IYMgtXMOzq)#o`cp;L=gP9kLvie-qZDHqxnH>eSQ~~0 zGY!fnRh9VzqWJsyuZ#YceM7Gu$Hqdf@SgRO^5;Cjh2js74rHVViY5v zXKZ8|bvA9o-nC8oOzs)v15Z)ctZ7!rGbmcxy407J^0I27dwqF>SUpyeT0ZYhY3BC+ zvBXT8z88GsiIW-KCo|EuutH`2tJW14E5D6I2`^_Ybk!6MhnFu)e*BN8E*d|xvJ3e7 zTgTe`54dF;Ao+B&TH<&&RBk=xpeWg^ z#o*IS2%m{iCt#n#N0;U3MlP=HcS6FU8R`6(fxf=J*KYO0k`?KU(aepMw$BY994x7M z%U|Ey zet%JunX_)P_silsl<4F#v_^`1Mhz2k#cqmAweu@}c;MjGuQ9vZ?7O1+tOEE< zMLqv@f6r(&uR8OTXu6$NOuXXYB_+EeRE{S%0H|E4e&wSp$~!>4-uy89UuX&o!vKU% zh^NMin9T^3p4>l2lbuIGP0kZ_Xd0C*amB9lQ@w`e&Kfj_QwOyMON#B&llhk-Pi2FY zV1{kcSKs7q(~aqE)VvkP=;CDjlTT&NV^nuzeN8pghri5kBFtSkaH0SG0sF|d>!nuX zMKw>2g@=h7qJ@F3IgnwcReg~WA=hFm#>czF(;=qlcw>};Z2^LGY^fVPa^T4MlS=I5 z`Twk}@I>HE15GFGzMj&TpXBTRhMy{>^iaM)hthwHu9)vygSZcD5o~P^KrQ`I>GY~H z+7Bo(TYw=ba?tBYiyFX1TvX7?z^|)V|LtoE6VdIgbk{ApZDQaacGhBWf-72q=;Dp| z_+*EGMK|_F8(x=%|*_#LQ7SKJxc;qSpb1238`zn0%9EOZSn68lBG^N6SdWp<#^Ik`lU z)qFq1w6mk!qSn7YXnZ*Ppi?Xo20c__v6fn6q+GA#ztaZ_Y}EQn2CZ9TrJd%VYspX4 zeC9m`(<`m49t419auog@JPCx{u>e1AqIoTA{-Ji+_qqFlvDDf-LzIioLt|~!)CCG{d`3K(AAT@d-niBan4j}4Cc-VBOHjXNwdW#ZH9fi0d zB0?Uz-xqwn7^8FB?0U%Oah-1Ry>O^JV;Zt(9`Rws8#`44h-7!ypN;g`;l)qo(>xY$ zwQ9V{8?&{Oh@xnh_0vqL;#00;2bQQ6Y6pd3{*@o`t{={>=*8L0{+OyI#LQLuAuvj- zU9M}1k*@yok457Sip#~Gy(nPMi?;jV**4@iX;WvXbvKY(MjL0P#oB#8kDbyy=Mgvm zw7Ih~*Gs;2iFwJfa$?=Y{WBko4lhN=zB60>H)!8eK2J)GrHX&QqDa$Cl=^D7*$@fB ze|j!Fy4R3oc`-=&`XKZ9L^@oT@B?WUvQOLayW;)0BBSI7w1?U_;L_^@ zyT3`=zP#DNm*3XFHPO^C2%p6!{{{yFxSMf23NJ?frg6(r%|$;*;>7_$S2l{}(gJjN zY=f$R*#xLn^L~MwJ{o8uu=;Y!uRkKWaNcbudydxq8r$0yZYVEuf+xeet>}A8w2^!8 z*&9to^o@STbON+1}srZq17--s!iOgha+jU0}l4lSD)+H&WumoZXW!oIs!;e&iv4I4_@3 zD~927ghWe6v{JGNJ%05D4q<&8D|jTX zWraxG$}y@rU7F~o({mPV^*LUuJzK8mR7E0d1lTP8%i3_>IRrV1doj#)nr>Sp18R2u zDHg1653N`pmj1fRcA3ntZ+71w-pNIn&p4nhN)Epiw?IVIim>t`qbe-9i zGfF!P@8Nmt+Wz6v17(6bvj!l}5YZR4TEKib^W8IZa~d>io;n>#Fv85Sj)5o06K7$R zbamcJK~^jTJWMl*rPmNcqaf?ROLK+T4#n!lNyAEq;oiQ=;_1^M0hO?Yke`pK!+a_} z%k5WkY?Iy=ys7-YyZ-xrft0H1>0{uLb!DE`C@D~81vu5D+x8Nrr^%@zO2Gz5Nm&Z6+xQkrOdMxlHi! z3$7|E+&=iDQ@Hx;U3yDp7Q1P8ceOiZaGIvXEk-|loihW)Pj5O06OW*D!TvbvC~}T) z0*jN(Q%-}B+qV>8YUjznfn~B@NuIqgFl#()=}3b@vz$mVE$cGMdpkl&RY{g~An+8K zw0klcKEFqzFFHk^!f72YOGnK`6ZM4pg~^UEJ^$QZg5HwC2{jHAVRx&4!*LMQ zCN2d$liQ1(|2O}KW8GdvkC7B=YBm-jLf#n+maco1qsD9n0k^dBoAx3h*Jjdck&+LA zY@&i1F04rblq?5w%3vTFmh|7?JT>hrZa0IN<8OE)L5k4_J#EJ^Yv4v}if70P@R<|Q zERZo5k`*JT>HmB0IrA|Mp&|_%^Q%jubDP1nn3IXj90BoZ$}{%%@vQwGe<-1E6GT;M z56JH`z~tO!k9hf?`+p|o$CeeH)7N}7n+_ou>SQB4NXp@PJH!H0GXxUDyjgove;!l~ zmk9P7fpErOK3j<}kw`*#qo@T?`{`);Gs?vIX-Gi5!0TCg2vpfeJAa<&$mv$|Rk>4S z3RHfaTqEd;WI5ISYFKRRK-1tJIxk8t!eEuGTf2XFUlIh7OGDmWn#$JpMD{O#!jP>w zFf*lWQ~-QvF0+iT-hNc*l%dsbuxf_d}dY> zwV3N@KY)28$xO)l{Lcu>{C0V{f29_hF37s{z|j1fuV2T;@XV}zyZ2kiLXQNG0m^#w zRxBC*_mpO*)M(9u9w^iRC}4uX!Zpx?;Ic91%tVA|XDWazaJbyk*W|I%loLM=2W^8r zH0PE1z`T!3+MSU@wFbB@Hrf+_9$zjt4z@9v`Cxh8A0j8A{pl$`qI(LiXKV&8fnyzT zM3}<*{rwDqbs4f0!1F{)yn0L}^Ng73@t%hAPCg)I5q0O*xw3X^7MOqhj-#xO?+_ns zcYsqGIPi>vOfIT32cX0(4i_nTg8Ne3ijJTCEeljD<@$>mRH)E(y0mqtzlWB~RX@at z)DKR5Thm4jB3i0t$S)UY>n=ez|H57jI88(qbV+CmE>J<=>{J8!FNfTn-~f|1-_Mn4 zVdo8Cq!IkIx^p$F;b+R7!&a|*zt{;;)FdQ9WuX6c4l$?UaX74`^uFIfFgo(fR@ACj zsQfSGKijtoy2it|dX@~NWhb7XRn;{F{VU(!L$toorEg&yaX>aqNYF6ii?to6N#gvi z8qBig_0I@=CQE!5u(ndk05dRVV+Qta{^yQ!h9_VzXPYp;S@|dTTJU5l$Hvuts05Z` zetx-E-5RWR5FrE9pm3)2D0=%;^q>ftzTjX-vhbHAZ-EaA67|VLXJNAhOAWMLxIF(S z8ulXGAEiD;76xBdCGNo0eXzeaRQM8$y&Fm{(^M2;ew)o)up-AiM16A9WZs2hQQt}q zjYfQbHC%J{eZRCIk#I!pP6s6rFgO9mPG=8|eM}2q7`1g_EkClQRkPqE8KiYls^tGb z*H(KpwLiFCL(r|H+W>J3gjPyU?*ICMuV$&M)c0FH{bM7%N|lmce<2kJ{QQTd15&=siz0yuoI!b-kCW%#6d`L zJZBGjW5Z{I#KX&3oRTPqFElVQY0KwNkt@*h5hy(s0NajU9h_l4W-Z9_MeTJ>)b^bF zmL$yYN@j95Yh81Z$bG@bu4vRZ`N7l{oKV5# z`anfd&M7@bq8#{ZZq5*sWlDo04LB!T2_+S4@Dos~Xzs5P+gr5?M9Ep_ka$i5geq*| z5SZBqk3_#YH*hoU{e9P3Z zZs;f4;aDu@{I}EQ4H0{1?!JsVZ}o5n+r*5;vdu%lqPu;N@@YyGH3|Az%Mi>5mH_ZT z_@b#B-AnClqxFies7Y+nC@l;kh{{fEfzyNxqrX^Za{9(%Sc~L~D8eeJFo+&^c>PCn z6)X3lvap!X<&);_AXXi>@{p{9KZiwZGQVckSW#$1SbT)?CCd3XXM51F!)Uyi? zx4;UQBNH%i%|dM=A%U6;oEFWeD7MRheZ~z?AWYCiVMEyuEd^MH<^&l%I1aW?Hz}h# zH63rij~V4ApFqC$pN-E{^onbP`Axf93HtQUSW=djVxOY#LlaK)xY&<-RUvW?bI9uB!nJ|fO?*r zFRKd1@>BtqH3hG~w4`Uki$~tNBAi*V@0~X6ADtpk2IeK(axo_Y0?H10-baS|^y?Li zK@k<}Vcw^XF9`=uMR-#Dy0&(qmK)W3%w+9*TSh)p`@df3taNm77|9{Va>pT)L0HV} zSjG&A_VX(a@$>XyPOnY0J6$^oipvSf^U0TK+E~MJ^8aA)CWGH=g8z6rki5HNP3uP~vPk(ZTt|8w5 zVtJ!#0Jh^;aNHOf$p?6ponLIMr779O6LrFpq@fxivX%aoN@RX6e*7`=(ZDFDq3cC9 z9rr7>awHmUWEfTRD0Q12jmP%MA+feE^-oCWSLZ*qXJ_pZ@ARLKr9fmtZ)wSEUlwym zZJ(JjU_pAcO7$s}4TezcTU9~+32=*; zYnra~?SYEi*=ki>L+RyRZB8~saJjTVA ztKo85!l1{lJvHN^PO`*~BxOqV;()zVrd;zCxvzCw1&|MaKCGg5>uRlC2h7;(DyUY^ zl9pzysF&O7w?^bg!%?TzaUez;6aMJwsZgJxG5)n7n$nV~b)0iUV_!a@HWhGRr&#UC z`aEF6mNKHP69E4kaBA;4z)A-0Sjz);&XK5ilsDpw;{E)H7XJdl#{tgcldSfE)ghKj z&SYCvHkM$>m^%D*p?5!{a0~A1v&(%fE``m3-bAGv(v47Qq4IN}`hGQQfW2Ab92m~< z9#{geJk_{(aRMZHlWJ-S_ZyLoYZMfyv#D@<`D**wp91IFS8~&5W-2#^T-(>w!Fvwa zo6X0N%vJ{a)<@Z-&BPvLN{!%s1CU&^wZK_n9xIA7grWIxi2GWU)q6f<^&2Cn$8N*D zTU#aG3=IDFIVWVr*3Zw6;XQVg@}nM5LERjBh?hH6l$g(vOt8n3+7;$oJXXxq&}fc- zqkdZr<$abSNxlcdG28*~YwgMB@s1-(_8zgtuGszz9LU{2Dms$Bg8+LtaR1CHx9^os zqeK99gVnr;lP?zcAAW9`55;EfVXuOZ+tX54X}S3@^YQZfl!*W3!4L7UBa%fK>7mMs zmaxBr8yidkJTxMByd+QvWobA6u_g4APpB7dj-m9WHQT%J)&_?570&oeu2Bho;~j&` z-b>3}KKU}~g@ws1v0u!qqLoJN`%rMsR=E-Sl;~ynv)c0KjHPf6ajg4Kf&W71X@ub7 z+H-b$@rsgGuH4&@AKD;2P!6b2g07{DgNcc6p7?l^6mD21al%iFkHL_zkl{1x4p>P~ zVr2<<*F~vZ#?l_A4Sf(#l#YcWcH0jw{ir$yY(eb&*MbCFy>0B@4shBL@Os8p+(+T5it zOr0L1;~t#ljwQPbAmbg;(Q#Dcg*971*)Y70ZxaQsb-&Vwp-!mQ|6g#*9`?J%LPfGH z9HQo&yF>-MQk_ik?X`l;(%dqO#Q{th55LsT*?BD-?(!f7lKqPeMmVfR zwEdDm(XiP4L!h>J7kXNwz!JiaEe6-Vx6$yTs*5Y=>fRzD;P^UV)Tp|TyA`yKcx`$S zn>bgnsX#s_=U6B1o_6BXl$wh%d59oP^g|usk>@F%&&p>I-P2K?_I|Z30s`8c5`Ocf zY(d#mWFOtC#@DXsc(@SQiH9}#z7}KM)xc>zE*X?AgT`ENm(xA1&qjwn=8eQt--BpL zyG1g}c&5~z1U#0JixUThi9L_71HHi|6hwwYR10)bfv^@oDKY|;XCVh(;+lBp52-lD zY&7^eQA&zu(y(m$7*3p7!KBULF(2P%GiN1k$9ep9Z*Uj zmFM@A>Ln+zHvwGKqciF**8vi-Dh@k5E0;Hl`1Qr(__TI2bEOxY9{nxy02~3aD=Z?g^ZdsLXCjTyY*UR0lzPVZ2P*R|!Ykas zus5$9Y5DZrN;E=>DXdmYnfGmvV@f9dhM)SE_ZjfE(aTJ&>XtbPMSz%#;`rqx)f{+M>JD`kYncL@B5Qpd-nq;W14GW zA@$)-jdvpMqZb+ldp>WdG6((w4|}_uv{ilII8+;x@+bNJwx56;)^AZ$^lW}yBa7ZU zXv`QP_GQ(#y2cVJ)`<_Xa<&|-@?~_B=7dYb&KEJyTG)NK6e9X-BNtB$nWMZicSeKm zx!#%XN!PjhT8TUcbU4(-6vwopJI){A!yfVTGA~)_V{EJ#CRQv+Mv4+%xm^GYAsvG( z0`^I7(thevdaxfweXAi+m!9l-QzZV(j>uiub^%?#*5rc=`LP56F}C;njT2MKLH*y) zTeJVx6=Cl6e_PuUmju>M;oD-kCf@QQsJ#oYy&(=(&eSQn5Bq_y#+RwEWBI!KZtTZ* zlKfAS8A6~Quj{`$A~5-2%()#*)f2LxWxGs zWC1m=MT7b6?1U6VRCTX%%)(Al=Tg|XXU(TD zG+0>+G(prKKR+KMV0&q~%P3Q+oYf|lcP5^?#_i5;_i5zy4QJGMWd~u($u34-JX+ys zsgvVySI3C&ojWRw*3zZK+d0W4e7<*_%i}stxtDqz{@~nf64>%h+$g zrFp4cZpn5S??DfpwQcE?e*#>yuqeP%Ijr7hNPP3A6V|8l%6Pr{{QRMZv@j6lp?Maa zlU~viYJ0O-vz&1MeyZ|3a%ds0aQfbT5)kC!w*l($GBVeV%Dq}&N@cfAD@038&rAd3 zRQF@k?qZS=;K_B}R|nW8H9cPnAUmpmwu3OxDU4VRM+in6$ID05sxlv3+v$7x6nhxJ zFGlwp7kW9+Nc#AfepWLQN7sM3bFww)O9J8|YgoyAdnvVoRJoDRu-FihwxfbXogJqM z*f}^vUvq9x#-xtU@g)>y5N($R^bYaIv>3sioo>cUqvqvq4POf7`|^yWr+{*hHV2R4 zEHNOoReA$u#dn$pp zrah-rtr|$zeq$!$KP7IRdD14)4V3)fu4dPKtpO2z)%>(ZUtdpnP{@BSIDnS`YwyVB zE{+W;xC5y8eGJtmP|Zrh0;+ z|M${UH|40rX-byriGdW}t>+i9uLUH7;^I(YB28v~Q7~Fpv5n7SdA0!o#|voVt=vq^0W5RpvZ)c;A3uGMsLUSO_p%vwOS zqZ#TBdkFWRZK$GL8F#N_2(jvT=0VD_0xTWgBNij(C`01}0PEOSdg{nJy`pX89T%Ts z{8pg5Zmy5@$DK|3AzNq!e6j9*7-?0XBkE{=(Ccs1CumGBQX`Fg=1!jpre&*5wM|v;d@GKDsrcooH=7y+Jk?_CfAVcZ4nA(IC zS+XFBi?c(5AKcmb|9NZn{kyK1qJ4-kQ4J(icu#T$DNa8kMdwxFF*27$i*Y11GHYxX zS`;e5xQcQM_D54=#}pRZ_Gz*m4(uLRtALeYC}F6jCiihgtiT%kxSOL>7|;kZV=7r= zOF~2+j>5*M+q@H;5N~OL&NoCYnWHrj5=%N4S~$Vz7CM6c5?pa+W+m%QdFlD(vv2O! zO!&-9t135cd9fJibdEx@eD&xUuvd5U_O@+Kc011b&2<()oXAPYFpN65Ifhd&1OYf> z7)eb8LdC)5M|DK#vhuJ6I-)(X=6Ujg;#%<&@E&aYgOgNK)15cB*R^-Dh~Pg7v=4P} z{n+l9i|krDWL+Q*{_-x^YO1Al|fB|xs7I*P0&(fA!5PsSU& zMZBc@ejjN-%4gJe$$5K=t$fm2CG-8?oA?FLZCscB-}irw9W#VoaleRNVX~%w0J$cM zFyi<%Gs)OU%NZH_k*n;L3BEB}f0xp`B5d>u}17`2h24hrPguw{A43xOXyFB7;|gIVL;fwZ zlxAd1r|n*ti}=>VS0tj+cr)_4NhQhlZ>BJH-n#d0n>2FqZ5;bs*g6h7YDipn1Dkf5 ziAea2vVwfqcOGE`RjyHA&g+qt6CHiU(OeD4}*@^Rbd>`=szR!uZ&0%At@;ficm z?zC%sV9_n}s%pt~My&5!-FZU+bBBe}PQf$JW;mX4Kn!OCZ8`V^UDZYj7#J-e##zAX zA9j&))Cx|KX^l~(5P4tL*nu9_apJM?zNYgK%m1kaCyK$<+0xRur1O6u-MNXxA?7x< z9s8GBG)?^!-R0#;?uc!pQB6sBYj0c%6?2tX`CjVSo?h_+3LtvjWu^x z@ija5BcyZVEzn9xri)0Z1b<$fii(W9u!^dW5bcMemFi(0$@YJCr=6QsH+`zFd2*Zl zJE??GzeBqZi}8P6)M$)9jI_9td`HXX-_;tZtRF4q6a8iSO6p|#*kqYfZFF|ww@JSN za5@7BX7JwK)`aN-yl8)7DX&56`&leLG`|^$C;Z<=$GimSMWmC$uv&3S8gmQVR-G`q zbTT$FxOi`)JKUzu>;XqAxO$?DG@TEKWN)CLDXgUUI%;#a;JOF~1g+uYI|Sk@=juw* zRJfJooUBz=h7=zzou88{avfREThmyn*wZ%LSL|e_;;Eu*?PP7yeu0s(l5bML{IYiG ziI3{mh4ItbhTRi;+uVC?=9c%iEri~oQ@goN(W6PrNAi?9eQfNsZ-oUTt84N2H&RD* zM7ABtiY&b|!s99p#BW{S$k>|^ewqOwv6WYYBvfdu zoNx2-Vx&#vt_V1c_}^Kn+B&@BD4rRFukx9!3iX>8Yg-%D0DIHH(0+d}#@PpF-PclG zwv)PhZZ7QIWwEEva{T#Xn27a@%k~2KCdrH|%8XxWN9>ChuOy1Z+BmmQsy61<+Qj*^ zWbfR&dE4tsSms{q zQ$1)Pc2rk{W_zW;_1|;Fr|45!v{yq(RTu+c=8D3Fm)w$ z$l7zkM$5%#fJtj?acWL#(IVs>S(MU7)ZQWK*jCCj_%%tFlz=)>knv-L%$JP~b25NH1NT@c;dLG(GI23xF(hHc6i8X` zMBHa6aeiZgu?7x|Pvz!)FHPrxO_8rf?!0hBl4f>C>`>!o44-2-H}~-k*AcCnn~oe@ z;+3fZYMLkY&MLyoo#DG&{m>9-hpwth(^SU6Szh&W@0x-)-(N!2wntwFq$Y_X*Ry9E ziPn1{S#d%8QY-Pyh@Bb_?3rW{V|i9}_qgt1bv4!KNXv^&?;67Ig-1*~TUH$lfBIGc zIVvWbN4=%psuK~qvL3;_PVD%HmnGLG;NoZ{ky9qjx+*unT$Fk$+$GVpC)w}Af`f>a zk7|EJ;O*whw6E=TExM|y*X%N4*66?BqF#CjnmV2208Qd!o1GX`fqBB~fFi}wsg&$8 zAelt}AGY2*uBp3kAIExI+FOzKRzXFiy$ztUWp7(67^VV>Y%Iuz2w@lk9IRDaW#|ce zBLX5zwqd)VVak>jRKgx9VHrmFy-&bC-{*P#&L8p;&iu^xxZc;r0QHh14)nXo^mSF* z0O+Kc8f7xRm~0;o8>lonO-%7zRi50RS&m*AA9%a zVs>+7!Fs%L5Mi>K>A{|76>8mlU1_N?9usU80R~TrUr*eaAm1|I?tDY3;Rh4a)0VOo znH8C?AfNA=s9=7iWK$@sb#tB(%}3afeDLR7{F=u3<~w*|k8 z56eCDi#_#vr6=6+whv7NvQ0jfoE)1To77!AnJaAdxjQDN+l!etd?Lxpwk9do(QsMC zmKwN(>p@fJXdTR{l_-|W@l^hao+D2$^~OJzi)N*y-zEHlAitav(IXb96&(IIkA3+U zFXCJ7K&&^onA((;({2dLH==(kbM@X833r^2+xw0r^Uc9lh-F_4BJo_QtH{dD#pHw|!Eyc`z8PZ{U6$A&Z!kMSg&9jz!+yBY0H{-v|%K3VKnTXgZ6?s?T@lAo4u zOI~@k8jYE4!di4eg-?w=iYk$(oxuC!NI1`&e&(O9E>8@{btUFcP28ZzxOb(-1St@V z`^>)H%@g+}SZKO!BFi4whodS+e@Z!@<8gm&Zacn&$O=p1GaV<^@%tmsWNXs^zME-M zEPutjeM`Ss9?V+Rp7jVv1jt^tk1T3x@F!!!R=jv^X_Yo_uOQftNX@!_t9kIpV0Vgj z>5gysZ`#7`WbGuRv0Pms7+DW~088$Tj;vrS0Z;80HD$<+55G76G{9em$V?ue%t^Ew zQMkz%8hGojZp^4%&RlzWhiFw5+h#XFEhTyP-QOdzztF>?qs-^!5gTV2i!RdmEBPuJ z+agY!PvoBPMcxAPvRx4KO`Tqx&Om3Pqxte-cRC#>bJV{)YSr1|>te~>U>jQKsY3iZ zszfsS*VIEjqkkMSeXchZkhd#>@s<|S2>WIa zeL}~excDLC^MpQMjz;U>72qVnUInRG_z@caX#nI4sHfcRv$$Dq8*F2!Wrh4TaNxywc1EEpx^+{U}UN&?6KIv`~2?+o% zHk07|JqpI#pg#L1<^dN2PC^?-VQ)TJelIs*?FcLXcM3Txt%r za*d%s@_Ck;N!hqe_xt>JQtLV*Vw;3hruUlmy?>{W6V4VPF*^2;SDA@@boIi*tr_yT zkyAbnxUYGezc!WT8>%48-&XgU)8a;EqWQo-J~G8NaX=|+>W8uR{qW7s($NvkXx*mN zv~SwfLj|#{&2@*CK^4bp=?pBa-(}jL_ZUGsr?AV?8jc6s+wGrU>g>6}KUAb?xEQ1% zjraG|yx|*^ws|Au@40-8rTuGOk->_U^r`?y`fpiZbs5MIg(eOGP5YTb*IMn8%jy;T z*cS9rXVTM`w?1k&pO_g$c3OR;8QhRNRq~F%RYFxNkMc_*g=zY~A9N<+& z?u@K(q=Dwgz9 zmA?%$z=GD%9DPnY_A=8u2-KX4-F(s`y=T{K039mxB&n%<-cvOWwsk|il6j+*No?zp zmhsZ-P-Q z*n_+>?>Rek+0F=c*5EXG=1WOY0*jWac2~&sch*s_(>?)=jXHZD5ym2KD_&E%LdT9t zYEEr55478mjVAC?Cg!n>5$O~U0iL)W?~ljymG}YZtk5p}Ik5fCVI3zcdL`TBIT2)5 zA)pY3D+CwS(MxCR;cyKe;BR+vb|Mo+6~^0nnNw6o?QuR$iYFsdi!C5jM6C#y7$y z-54#Y(A|DYV(wPgMXUW27uc;=wGy(9wKQGw0=gUCoP(bw<9|t*FTFnToaoG7MNeRS z+cPS6yQjH)X}qY=+2J%(IrmsTRh!UmwwZD?=Y{i=@X1I;R}68tKgwZ+0Mwy zOOI7hIyR`NfAySH>w`5tKkp&0Vx5!9qwz|j{SV8J zCA~W#JOgb-kKgUuEJO+NelG>Uk1GQ#JA8qj)qK(~Nn?AhJ{E@b9(%=0dc_*54|7J?mBc-z{y{TgCi}~D~WW8@ikfHw3 zafO3G>i>S@^{{TEb4r~8@#r+Bb#UxGQDr?KKn$N?l~^3)=Z;AdNUbuBsFfDXX`&#K zoles|9rl@fB%u5^a`O;ddDobrqA}@ML%2n!io- zdViy<(@H6X_%?ZP-j;KA{q7j3t0D(NSXR=p`gS9qTSc{(e>b1i@R>j2o>IQmv^Qn_ z-JRy%xjM(511s9Wt)?rfH7-6J!wp#wBV=q3HTr(DHYs>LVx$V~YF=|L zXiFfPA3~tdJ};HosjG#IH%mADcAg08AHhR@%~?Ysug~GpXlI!wV#wOJd&Cp8IP;sz zS#<BKLRQw<;XLI%1|`xVZ^Fg5YT*2)ov%Z zW*%rgb7FXN*^oNt9-TTS-)0*nVLRr`{sXDox}DhBr~Z)fS37Itw*!X!tHWy-YBw|2 zDY)h<6>AnpKfNR?gJ#2rsK8Zx@%*nJiB&(dZA@>zCUIK(YV5p zFMU68&$am_F8qWB{m(k(0y%;qp}=5ltLt8_K5OrhzuL2;QjN|Hu&Vk)Ub*k)F|hm6 zMcbTuD#EiDSg&v+Kx)2Cc8>xi*4%G{i7yX7!%R0*(!Z%f<9PROczAMg93>s3iR4&K zA!b`jIC5S*-Zna-)b5zhi|$Aqh>(I{k^Liu05h;8N2l`O=GP^yzsK0-xw|X>yz)vKDs)mSJ~O! z6Swk=kX|uW@#r(DC5u;!+M)RH-jD-Q7UYaP{rFbX!QzUhwDx>Vxd#pOIEb&kE5b^@ z-~5Z7tVvZO7^-f`HssyK5iZ!-CYEnlB(PpqOW4YO&12*Y3+-ucP?|?LYulgE(#cTdS$zf$N>)naRdisUdw`ZGo%;=j7FvmFz;&^sT#o1vkjTCyMnES;`H7TBYxRbAh`2Ci4YrQ^awZ+uMo(=+Z(CHhjpnIOjp&ex zFBhqGm|#Ujw>o`7v%`{_^HS;Dld62NH0kjNO)ddO%r zwt-4)(}{r8E%nv9<$i*T4AK*zT`(zmZn$!+Agiuw&Q8#OWl3>!I9Gc)xw5kJkHwVg zX*{o0qGcsN#Dt_WqvQn(=m@RpL4dYHt7(7@2~;;Vz1WqnvsKjUr=|h`_p%bkQEfyn z6pKc$e&W*GT zkQ~_A-*4f&kH<(v9ff@aASd7KJvzbP;-1HxX}|RngBH9?X(e*z=4GUeGF?8ZXSC+# zN{kt5uL%W6x&pBe2%-(uJiC6LnWO)$?1R#=TR8q>0h~>X6i%K#HSn0#bcy5d(O;J+ zg0Gb?vi!*0JW}G`SpTD1Wa9_g96kkkY^Q^cotoI)pEz>Cik`a6o~aJ(UKRO@$3Xml zPxg}oD6U=q`ib}2Mn0}OBi~_Eppa-1i|#XRGZ<5hChNpkoB&L5jG59`hntzblET<0 zpYm@gGc{aNOvP+-wIp1COkGjd~Sj)`*oq>gC>yYFzn5yo4^}Eju+|IiJ*p zQlr+U>4i9-)2ZFZXbTnjhAi4pV#WAC=~%+X;_qO+CRzTk??HG@4gjDdib2EE4{rx< zWeixQxYi?87qTUyJ5!#p(m5UAbuz@!Sv}T8sT~*+MA{CH(1T`*+lSdQ6EXmV_~0uw zzso+st(lIixDuI8zcDoAqa<#T{N&>k5khHaN)f>-)rWJ+ldUt&DySB(NLm_&r4-pg zwR6Qlp`{FXnfOk-?I~tz?|NF~*@#?2lAR%I8h4Mf!3?wY4I-e0xc-NbQi>#xzWMd5 zi&2T-b;OVPu-WG)o@SoAx4)G%=We%N{1OgP{g@4d4wBnkEW(bRu)gtf58~TpsJuz+ zuyC9G4Y#a}4EuKVoafESgp)fLQ&yn_%vozOJ|NGgs=VaG1;xc%{6Qb$3(3PXSNsA3 z5DywEidg%mSO$(s*r{e`W{R5I1urwL=U! ziZFA8h3j5!I4>@;9lcsttRI?48)aT}MuMU9qzyV%-o}S0+-5UN$Mo^^ebm@>lRs(QGwhvd6h&E{Nv|L7MMFj}%ezeFrXx(-=(`0JahgvNw&kVS5 zR7hy`P%>sN!jY9T`hbk4RLce9=kt$8vK@~WOU#_PD#bcA0hQb0;N-0xbtZc^k-P_3 zJvf3MVs-rK!W!UHa-DxCp3p4hribDls^)n7&5AbTd=K82ZEXtU5Ptt5VMjDXfhLHS zVmOVH2Em|iKQ_G(gG=3pt(^6QYPIGgTeq2;IU9-X#nZtJtOZ=EQL*a3BX>C^PiRc2 z_K8z#Cyy!iCOs*c{La+%^ zw#(1VYcy!{zm9fy-{VRmW_5~dE&SOR3_$0NkPcP1DZu@DH z0uC?|3ga$L5c|@@9VMuBECU8<{O^mD4PBIdo;Ur5A1N>IR)4M*?QF-7p{8C&rwLus znZ+F*r)a5S{ZWkOw{NkQJlq$LZ^N^~<;KA@8y0^-hl~_KEFestF!(Kq?PO=lBUfaa zEhh3L)f%1*FU*nU0e<4U?{LrcSi7XkN|O#bvFnW)b0puzh#l3Tvp=T12n=RgrEabz zCl3b?8m;zi>{2lr{bnMtvBuoWk`i>1RycN{rmZ`H!VpBOPkXYj9$mautxh(23FH=G z+p4GFHjiU`BL)FT_WDSVvyLnOAzk|C>=(}dxd6+I3vVLLGH;nI2il;1+c8-KI}tz? znb|hG;5#AB&3EAHVgl{%S1)EFn2zo9Q&#pj`b|?UeE(&%8#vh)eU|TyfTAIuY`qvg zHGgzU{)4Qh^;R^$bBI0>zG-Y!5-X|2)6o8cCL1dOts z`ba5GWZm$azVXac6EbL$qUfIuN-{J;wsKueIix)!pGe*N_g}ksTD-n2sB+Hlmw~54 z0Hgv`8X|F2Dh#p@Z8&2YwSV1VyFf%k%lgLt`(^5&MFQe_|5C}kBF0AYp~7{HdE4$J zH-9SIY?H|NGpE{Vvs`Z6GtV{#GxX;#jn5hQIH9eooZ!Peiaq_LlZ7`wj-3ahKYd%y zGf69Noer!R06UD{kGZGP>HXx%^h5a%Vh%!hwB{avw`QsHOm!{&u4udFRzDd~ac2#a zZ+f?{mmmr?>)JfRj}(%+&Uhq1u6L76Z|9{~n#B4V(LT7t z;_e!2Q@OordN0!|g;AYsPN=LcC@}NI@wiRAxes}z>80^@)SxHvjv8e)|2X+sEHA}Z zv|oWy?hsk-YkUbnLPbjvHwqS_vbhcAbc{RIkh4+>P7GyLZ23z1_XOx*`)#!0M2#=Z0x9vFU+OSB@Ij@BoY2&VR9x++e0V5lI#*tJ@6I+%SEP1Sh0 ztmflEZA-}*?R>bX+pyf97&F^yrE_vGBx!1O0_V?|{PDY24G`%)=*Iv^}GF><5?5?lck4*`CxPmh+63-;A@gRjvJ0 zy)g50XYXUESds`UeHk=X6W%vYooZ%^9Wh`x}lOE%MCfzCdy0&FF> zz97qXg7ioF#7VY(8&uqug*|55d|2|n23r&c+kE8$VBU`p*Tb$1btN+9@cv>{_oT)v z+ekF*b9fHAe*Sv#Hoh!qB4H?w(O@e44bKOK?QM%EZkvBHl=e9vP{Ka$s39i82Jr`@ z6EL+O;|9<8d$-@kMP8BNHL^G4-VXaGbjl0vS##%!${{`c`paZPKXKA(Fz>38|MGFS zr(pq#%Y4TRQVo*M+3+5%3glPxT~^vZ+h0fQ6xs|}_E*}se}Bw=-+9-8lDbT$WADkH z)Y$+KcS?SApM2C9ib}Bfhke7&8q+Y0Pl;w!;hVNQ;eJmNLm$$*Y!5V-8zB0Wav4zG zstQfj1Todzod6N{P46pJ%Q`(VoOJ%ZvXW6QGE`b@rbb;C(F(fRmeMA*m%=FE8<`}y zc^r*1NRg@%@$B!T2H2VpB-SAT%zAuQV9R`4>t6Y*(wZ->KQGNDcKt@N$SA0#PY`!~ zcoVjiYX!|+b;61IfY!dr#eUxQ9qh`}W0%U%m(?c(=;zTA0-Q5#-nf+ocnQ{npRasq4)CF8cejb&1<} zpSKjdT2LLyM9z?E+4%&SVTI2(W;!$VtL&Cq1U*v$0$5C#v3I@k+&XZ2G_`j9H@}(v z$+K#Gq(l&vZ8*5_guyod8o<=8r%X;dP1&2$a3vu>PM z@~_Y5>ni?10`l1ZOFMY#fx4aFRC9o$(^N8G+tSAG)&&q71OS83eU*Tc&J|!8dN_7` zP}ziRF=&80cxinfyCSTNAoS?^Ca&($6H!`fm>l8t!v$;}by4^*hX(6^IAmY*jo8;# zJOr3jc52G@oaAxahMmhMzy@q0TqmPM$6(T3Z)I@QleAtO0^mFj?m_No3@p_KFoFT1 zX8jcrCb9&Bdt;}v6`P4TJ05jdM z(){ldWJgRdK%G-p``;6W|EJ}~M}T1AG{HL1imMP7`Xq|cg1d*#ia5~8XdVzRH%QO3 zy-sg!Av)+wx$$LDcGK_MmEPF

M?&0yYCGh5#G90YJoy#PHYLW>Z1rUZ*PaA-3Dj z-8H{$XkS)Ysy^!heno&=f9OQj2VMC71cq~Cd{ufNXy54pBJG94;t~?L{o0jqlK(MtDe3&tm0mK3U7{jmVJXc-P}XIe|MFLOU?DsiYplY`UhGH zcg4f{Oht94)e^P?nk9WK*j-t5qvy`Ur5D|aXcFa#B5ls0h3ta{m^IFzLx@~cwI-k# z^=`HE0l4s-c+!7%o&#@bn(zI7nT|;tCWY)QlZxB5z)soAcrVCl&^B;&SGh`dao%2d z9}>E5!ruD31LodPV$)W3T}3fAV_USkus- zH6A(={h%~${HqeUs($LRBG5MniP8vR@~i37W27VNBjbb%#ifBke#N_XY;HI)Ze@I5 zRRI?{z7L@*Y>+V09@t(;=@_EhDVqbZc?KKLQJ|gWPKmUOM2hu8M;jDLL}vj@@ex;O z-1*Sa3N;QZ8rK%5%0LWb8UM9ze4pt*yB|+P;BeM_%yMw5N;P7w_AKnB1o=ggHX%^$YSby$mav6n2V!r^Jq+IOX>OaUy3t?R z)*v~I=|{Ugi+U3Bp31=#Srlj2!${z3tS*se18($K1G!J9& zUfWnm1)?A#5KM{Ow(;N@2E68ZTL;HhyRhLk^k#~^v#g9X)}s_FZf_g>!G(jyk;ZZCI?T3zgG)8`Aw0STuVMe2 z9=^$8h?9zsxe{441V$Ka4VLnO-t8}OA}RLU0V(Xfuiw82=&F|DRg(5VyiP0HR<147 zVA?YcLay!Nm{}yAg}nFMaYeR^899=*5vK8?!1zkvv;z z5ofv`4otsBibt{;cTU5zc$w%xtCE_)sc@k@*T@LL?ut>1rt`1?aLWjJ|rQ||VYQAht0 z^{Uk5{)xqfo7&?g>dfGF36i67YBIZJox5^^WEc?icN5M6VAmnMEip^9UlEEm;0SF? z7seD_)MB~S(hXLpXtl3fAY8t6fn|;tKosKxjL}3)Z>#uPZ?yu@BnasUU|GhULu?P$ z4;jkuJ`lUawqtPbE@{xP<#5h*1o%p+<&!_ae+(>O)JXsnYe+7wdaO!V#>l7d_&6U< zm8#gWb1V2^Z{k)uVVc1=xr^tsA&?~tA%{+2jv`N39Cuu~XxC8~@;Mw^BnVyR%9W+v zZo#vf%=Y?A>$7Tr0|xITW^tw^Ay!OP3X1XEEgxbF4EIPIo$HgY@M-xi!+-YJ;rEwd z{#rikXkdjar9nlD@(y0wa=Q?vG}VD}%vwDJu|n%kbIYsd90Jkv_<6%(cYb0MUdu*( zUT^VJiax~yUtFyeaD#ubx96XE%yYtbr6`+ubET-=dL)^o50w{Z^%V!MhSe4^W#o5-+000DPXnbUVulJzIdq__d{EfBf#oJ|l z*hlS&S0{RH?IH?z*{P6i&A~5>`_EBN$r0_3r^GJgL`mBD%?z2i`r6f9UyPYbT<=}R z(T3fhh`0_|f}x7qY?GX*-n=@pzbm3+M>2a5 zrMr3^G~E={2*o?oKrF=(O4~wL5asw3EHD%+x!7dI+@}6J2!7wnd6r7?xk-v2g^MZdzGJ;spFMAYzIq% z`&wQWz3DG@7;UM&l|S6&e}x#1eG{rbr>jN|4=>HjD z#(J0ut7c`4zRiJp1|-&STvMFp?zRZI$NJ;roBxHPWl3Y7McK&UL0i%7$psgrjVaB) zx2|;W$vbOx9#=$m_ey!~8=v!GWj-C$6inEm_^! zxKBS)vc1=Ry;*&JI<;b}x>j6!Ja4_R_zF5i99)Y>qMI&Gy*Qs;U{!o`p|_)OiQo|a z6_4rp4C1bx^)D`rHD&;BV1GOucYMf|ic&4O9F9yB0g@ z%}RVmHEZgmtN&;AVF7{p7}WUi?kzZP+yH83>DlRg|F9 zPHk`pgOGKcsv7NkJO>t$c>Mln;RjIb4##EilI1KHmV`l^C~%3F}@WIg<=3$b zLzjfG3%SR;|M0fj$lm;W=ytiEGkp{DeKMiN6=XjGlxu*pT^Euz#Q<{a-u& z7xa~(=Erx9ULmvTjrm9Z%G$i&R$L{L|9L8`?#17md(mToZ3m#R)Q5^id@Rpl4#gxa zmu|ix+$BQx*X^nEYp@1wl`>?B6=;Ow?E|0P(XdKrAvvQH;{9>sZ#65#sXx+K14B-= zMS@&>$M*9l?UnO3`iHD~=|S&8N}SpP=e?c1WPp8In9Dje`G1CD-b-$IW89-@rp`Ta zwgC$CT~&GhRdy&F-^;v4&&L~iUizPLnDqyNzBZ>DJioSRY#~dgr$Nt&oz2ip{aQ=| z*vf$L$Cv(v_MGCw&7Y>dK9<6e^dtBP~E#9fm%Pxko@#`pd z&lX-Z3bjspqD+=PZF8kcSkLF3tb58}g3FiJgrA-nU>NL%br0c5(}aH9vFpRO#fV*! z190qo2$Zwla_#G|H>u*PS~QSZ@;pPZne(?qUsE>R_!qB;s>%T`nz|(l-Mw>UrR^S< zJRJ1unmA#wC>PO(E!!nWhpe@1d=p&jmF~%EtgHW4y#cn2jb&j76?H5?IwrJtF~Hv5Z@CJB9La z_fWj!$JslkQ!%s{K@0hL-YxTrNsNP(^%!$as-*ga&vDXWjE^ z13a&cRFzL2ezy?f^S1qd#5|xAT=7?M zs*h>a*TF(A*60~AOkfYj;9w&a13&|zmK9wXbaNzlHQ2es!qRhnWo``1Izj82>&Hlv zHq7hRdPD@1Gv2O9MeB7HR1O z%oGrIuwGtT(pY}ASEl-f+37>Vnb+^$y}Mud=+T5h*AaI-J!EYvd&Ck;ncBa~H&`xX zI#4vUVQhygExeS_s$@tg2haU}r)0vo2c7Eyy zJ7XyAS9|*(B(CJW_y}x9wKbEH`B!qxvZSnE{2i679uQORHJqTAc_zQ2Vl~|)r~CW5 zk(Lxq@>T&-P!Nq*dnR|?#bt8RB&QT7yfUF@f|1GgmwOJ1E>5(MT)B}FGc_|qUc1G6 z{N{%On=1dM)m7#c`%ZcH$cVPPi;K%RuGzLE8IIw zZC~?rC$BdNIdhg)QslO3w4cw>sX6873ATPcm&LD~j$Ut984V*+TKX&fW@5y;CP37sXQWCjIHxH9#Gz~;e><0*>^~ri zoOgE3lqh)dX*NAQU0Y>yscQ=Yl~Pyq^6gg@3rVewlspL~BmK?IXmeYt8+4KkcH@XZ z&&BE7!Vr&46ZTX~ynO1)6LC#fA|(eBr~`{_>V-12kN94r3-xj@KHZn7)Xy|A=M52J48=yD!fl zH$YS7>9sP;Df96ewuRk_7!)ZC z)<&@}20S;M8>+jM#ixYPjWt|0PY`4zr7(obMlS>Z;?V2rbeuBA59txtSY4?Ejhc1e zDHBwdigBqI$__3cfpur~m$TU_x&BqBJ-L8EV-nI0yQBELD>AR7a>e zGAGRGjL^w)DXgktH{11P=@JZ$XlGKu@ zUdViLLZr@H(p{7$r7Ss^ZF1zauz!7qvXrXNLT?cyYWL-Kkh)H`gU0~JGwvik+XEoL zSVjP4<0)oS&P|>;&l@Ti7=ZTS`ekUyd5^%n0RMwA9C z&!UEPYabuqAz_D3i(uwJOVEu#NT%K~C0X;}H^IxZ1l(^ZHdkhab zu5=d%4-%?9`dzv{3$vQMBn zjNUb@)1nvEwAaNC=UFVD?SSPeEncVYwmB980fWhGNPfs23#;((7;=rXP(~5HHU^^N zcA(ZlYlGsF=enuv*hL@qSZt>C@y)Kp+qXAKOBs>PbwMAI&fz(}jd%YIm-a`Te5LWM zuE*zm9|mJgV$(Q5g?MFk+NHA5-DuwM>uNh7cLNuo&f}6!R*ckn-pL=fzC%)Z4CHER zN{GGv?ak+My^^Age-+UPuuNIq5OkhuFOX6D_Z*W7dcb(XLA$TW&K92gCRV{6Vua)O zV{`@fD0#|tLO}RhClFIujjYX#3Tkf}m$-IiiIrqWm>O#;8k3q#QCwf}TxIn_nc$%$ z7naKE5K%rjufEZC7Z8J45Jf?ECO1tg)$fkF0tf#*5%~uCl z-Pu}-c%MzgE>*d9o(up^4bGDBmcsDxFdjA6n(?%#3kiOS{3&*iVyvYKBxO1%BP-FX zi8`@i{PIs8mojHz;y}dPLTy$7$EJ&UGFBfegZFFV)0)sc0>;KR4Iqz;DtDSa+>oR> zqt#-dPrW24#2c!~&gQQZx6{~Kl(Lwqy)3aPUP$)lj7p5S(gJ;dr!MGZ7o8M6a#{`f zm%>FORtN4JtVqp<P|O>^Jh@vc*ynHH74t{rj%cV-kq%S*TNf=@3xwGMV9llZZ%BE}6Y@az9X zp~CNf4+a!)JGErV!H^i@53JJYdJE>5!O_;KKUB07}J zs5;L(#Um~rpYp~?(qZ*YK|z5wR3{{95u@*^wBncoK)<|w^yz3az6mOK%pg6BM+ zlobA8g9lg7wBskW`caeNcbf%|Z~KOh*!nVl6yI1h-&%=!e7m$nP-B7b$g!nb5>K4{ z1de2YpxNT8FFr_~C6oN0X)E7d?_Ze4G4&ylSZCxjqB8Ln&xL1qj;xBRKAHTXK8k3F z_8-~$l%n293J%q|a!{BjZpfDXK6ZpSeNB9|J-#RSD&{;64vcket(G@W5N2ky=Wn-% zjCewLIqmwia!lbb{WzZb3w%nStuuc8g`0m1{eIgjyqxI1Tj$#~FgdnW4#0j{GT-#u zugzYn;Z(NH%r{qPFAc;@q@RE~vgecEdCu;^ha5-l<=@al;}-(@|1D3RSd!dYQ=CYy zo_u=lOo_8j2Wg-;cr`Eh^WV+q$0xrl)c3iLG7lzY8~`#e)@Qhc7imuzF8LrAsh%}? z%roDtbNy?c10F`tge*j28M%1>LqfbR=W^XM;!8}1Ww$_~M`Qj^&|UI2LXk++#QxV= z16DK+Nt#}kADz>tXh_iXM>PxT-@H)oS(FIXRO5s?R0xynKhE$LyI#XuW^sD3a*I9P zE_K1Do;PKgK8Q43@5|=#{bMbBOybTH%x~j5vOJ+N#7pupVocv2K!;MfBgQ5Cj;@B7 zuGV}!dDBnmij>ej1s;PdU0KG1fi2(a3JAm27TODfh5b2RziJNczb^iyQf{<-VgDZA zB$y7T@pdTCSPDO6JRjK55jqDEYx+vl2+~&S+Vvnp&eiPe&5G^@X``~U-=!Ux(<}R7 z`4AR$y!pOIBBXSLZ`C&7eYGkv(9XYUu_o(Z^*3`zuF@$~SO@x^E} zlr7F?e{T57-uU>{nqoligPku_Il00VS)$IgYhQagOS%os6UVc><782FCXANf{((K4 zhkRZgds4F34>Wv^t;-^C(C(0mV7?9w9|JVWZh!LEyX@OX+_ESew7|zi5(N;R58w(}CB9w%ucqEuPhAZl&J&HTXyV((wDE z5ko8#1%WS>vC2qI!d@(Bs>eUmzU^YKV7mnv7QY?B8|jt3!Qg+;$~g|EXnVJg{1|R4 zfl-y?yMYXLPJ*T%}4|Jb9m{nMQiTWw6xs$ zQJutgP`0?5(TTiX+i@amJ{jWS1cO7ElZqYhDP3risSuRS)PnCMJY7^acn=-A>iAvg zeuzq2Z5Ow>lx2)d9sN9fAhpJOu#LmV4N}28CzP@^czDA3dURYX+IPYyJE6hk4`a|7 zBmy@-g2i84^dk-ZP9*uDTaqe!-%)K*#%G*v7JJ+(TU1qs#*YS_q1lXzaHE8wwDIoO zMg#0OuaB?VgV`$uTq#nQ3)Zh<0U`R4maSo3)Bo4K=Wz*0HSqC5^`K;8M_}VvDr2Nklj{@J9}HOfEkjv*?ho-oRr`5P z376&R;y>dfm$Q*myW>6;-o)g%Lj70ma%|1IVi`}l(F^#(ceTAZ_9t0l8&+~8^Tyq@ z{xj($jb(;NBZ?C2_;|;K-+}NJj!FAM@`i-@=4x);jjl$!&neKL{lj~dZQhgrB8!kW zep{6eJfC^S%=Z|NWp+q=_Dd`|U0S%dn7#HZtfuhrn(Zm*A7F@8AsC=C-1sZ@m$sE@ zT#0){G`v*uj0i;%@AEV2gs97<#5-D`Rj7vNS3cam3YZvS_*c(Blw<`HUxOnpGi#d$ z&GJLnHbMZLO}!UD0FSGJfbc$6Vq~=zGO!Ce8 zBsO(zsBpv_@KS2Fn+>dGA#4O%Bf$&TsMthno0&-knFEQjj555mp^rYA?rqG?4!zUy=Ua-dm>bXvZf^6u ze}E_M8(1vXw;)K(>`vA05!_C9%^sTj0FN|qLb@o`@(@|l6#&6_?h%qth+E(KFRcR2 zGw4+3c!!>vZp3pmXp8U?VK@=PVg(vAGT`L)$!R(NEZkp;LSneFta`_lDu7ix*5M|$ z%XZ*=onQS?=e`JnUU7cCBoE#vaL)fELOZl)eEVKJA;k9VweShxJ1nUE{4QlXXQb>_{K0l!%-QzMM zTQ1u-mmTx%3AV0VfJxcfBPi0FYS&S#WmYweppHM#8V2TKaKU*0eMAxL?6B=sN35z( zv>#O7w)2?K;lINmZv4Jw-SQbhfb%md(IEewZ00gR<~$XMSJ3R?Lel5nmj@3_8wSj#; zbI&+5gvo6N50|)&CGjQpwhw5iko(4zXAdumZLNB)^>EHb>$y%9*8=x8VNPgHtwf|p zxyF}pMnROL2vX9g=BKO)_%jas?CKPIX2v)Yv{cYY!@kP&H||&#t5WMBtnJM2!D@7UQsA z-PPgx4VS{Y`Iq;U{0nW1T%FKkq`ww~CYIWc9M+GhHMBXmdoYV{PMtiG+$gO!_Od81 z8W%7Uf)HQ=2X?;#|4wZQdFPqYw)`QM_ugp-!`;BtwaGT!WFMN#Nh-obOJ@h>hU5xL zstU!~$+$t(BX8JCzYI5-KCDGY9ZlzMn$FeUn~o51yC#i&qUE-1!FjSRDq&jl!_I8Z zy1yF(5LciOy!6Z=k0)O>JIj&8YJ-8Jrk#}kS&$@6)e>-%kAxR}NJ_o+LiCPVHGaph?7&L3_L+&!3m zxp=ikhe!5u56n{-pD9jdH}WqDo>r0-#|L=MD&O_!3N8)Lk+!DpA4>c}C+y~!G3P!A^D{WwC45S$AhKBD*q*7C`d~8zdmJ~ka51hHSE$c=gd_YRxUfBl(jJ*-u4S=E z_~YEA#@bZqwW@dZZR`@B(pS;Vg_fT&3(e_&EVQ?pJ1aaa@Z157GB}UVDcm5TRi;v3 z4;1F0iUll74>XSRzA&qVW)M+toENu<#6*>s}Eo6$p_OFuix8)Z81(lJfD(;dxss zE!7!lwKz=>q?ewYX`}Mo1u8j?*S{7k0Z7GY!j0gIe;xYIuH@+tIOKrPf`WV$<28O> zTJ0P1*k1!YhY9KwYrCrClky-+if{jymYA$x{)OV|Ndz(%&lA@sN@{BPd7cMMKOj#( z`2N~|JqyEm#r~G7z)@u&))((^6>?@TV{m%=jaf<8#`BEJ-Z)=>#%AUUYg^BjoTg<7 zjA8xsUV`g99G2-qH%MqWo>Rt6H^N`uP1%+Ws>st00jA@x-tXH0|0dYeP>KWonTlV| zimUwqX4Wxt--k+nC*hoN)RM^`!v3m4gMPs0U~b<+vsrtqNJ40%WgJ5nFJN5nunX7o z-{uN2_5_w|IBWPn`#JP>u-t!rD=WHn(4{QP5Axcy-v+!hiqe#vZBdJ|!r+tn>%olr z?_1*Sgv6ENOAtkxI~r7SC>?8T9ZY8vwD`sz6S<@^C<}jyzM3rHiQ}s#83Km=}3YJeQ|M1BNN^*_eZ~7<&Mg zu7R%jcCxEe`)~yWOLRse=BNxSJ88AArDUB`=~2xoe?j1&rUcpi>O+L^plOc6j8w8( zmT_vzJTiX-kKVe)nH{2*KtM~EnMZ$OruS*YpvKcGz%dO;n>-v(*VHTtgsJXe@_v&{ zx^d$NC}_gMSwS+&)sDJg<}TvuU^c*A28)Sg_AM!) zO5XLpwvd*e8a<^Ttt(}^<%kKOht?pbTR3kRv9b$vb0Lt+Ome)2?sjjyH}6lrFn3dj zQf7JiwC^t{Q79-x|M;l|sygHwo83lJaBu7Wz>pLM+Z8)uvFSJwscr9(hv#qiF<`re z3U(r;ItQE%@^w>`kYzbU?3X2`#xojl{NVv7?R`j_gLEYEysoenl&Xxr^jifYt?vYf zm)OSw-~4pGG47z&7G2k9w=HWt+i|6~Neg9+yI)~A$C{I^BQ-vTCasXg4?<+{bLWWN zqg$M5x~ELg=z)@vSN+^r-{c);ljzps>&Fg^ewn~PEz-*oh}@dI7`rFI>S}&A(yV(7 zIctOx8-Ljf3LC{vICZEs8wX{Lc#4UqfVAdQI z60O@56eDVp;CSu<*EykVi$gX1I`W>uHCF%#aXtW}V>Ltx5KmOvUvJCy_VgW7Rvvjj z%)!^DPPe~U?cttUf82ApEZ^OIt6?g$pq5oDKXKBS2eGg2Q7&TFas2~3bu($)9$A6a zzJ~UO1-XdBZ~J`3RkM`*SMTCdg^B-#{Mps^`I6mdkbbA4m@kb!X;08dd9{i!09}!uMi_{v+lW!wW z{RsA@z1~wqIOHkj5hZ7UU-<)mKjEWgo!duCxTGGZkUxoCTbctT&L*web_17`}Psw_-|beaA^SJ8rj>U?}}>B%lR;M?qs1-7yZQTAJ}ihl>g z!_t1(-sZxSvGSN&v*w+EMQEFMYt_i+d>^3hYf?40HWuqNT82zQ77hRAnftJiQ~R<; zLa&MYzC0%*`i6Fae~T=NryW|ONEU`an4V93#P|zF-HPE%$ow(btwG^);QjFHJ6c>L zInizOrGTauT>qSR`vvmJ+c6>dRam4vh*)q;aujZI*b{|rXFPRKD@u?ytxlAB{pVM^ zN*lItfY52d#q!IqX5j+F*m@`-oG876e^Il_!8%sBfqm$mo4TpgJ8I3_nd8bWn^l5q z)jk#Ng^;5#+Z*haD?B1_-U-fpcZT~4E>DsetCS!qeCC&(zMiq9ju+@_U}3vkdjoFd zuv42H-l!maY#Rv)X~)=P)`q8y_SkuTUuT5g$xVcVlRW5h1O|}`^3_1kjklU)pNr^3 zPs8gFbsB#|(l_wP3ns&VK;WOpKj0S>_G~~rgAE4%heNE;CMRYY!m=2?Y4o~3DuK=# z_`qrxX?(_E=)m?_@9-WPAS0!O1?(t2%2*Ikf`a6Uj9NC;!Wn6Iwqp#TcxN)|;ND0u zX2qK+4{^gHVA>4dDf9jXEi)W%)ni4O?Ugxw;Nf17FiT*xm?1HWRUwa)rW2scxw{NA-r+t5DWV6zZfWN*e%q{I4k%6xB88kn_onOX-|hDO zM*H0^o`x)Ny`dANYp)3LTRmj_4w`qWAr*t{*Lg%c)Nk~A3~7=DB+bR+GE>mI| zhG1b}PXvP@U{w~hfKi$3HL(ssajHLjd?|h9+Pt=CWVJ;|v&wdhS-Jqi%HBA@da@*L4cGX+Sr!UdwhULHp-ShR8(dx|@n7qLRP(;{S9vWidqrv3k> z%$MH6WX;V+9W1e4syYvJZ#Rv2MQIjUqg9Xn$T{urwy?wz(&O(eWtDr3bA<@Ed6i!q zQVRBIi;;ERsIiM*ZDwbjI+@E|$}+gGnJ(r#u=-9zu-MB0d2B#nOqex?lj1%pVEf+yJT1n!A;|zwx&r3{S?cNOh9| z>B368+oryL!Z=&qq_gMHpTtbke7v}IMfLKuMSYCxD~t%*@c#y<4Ps9@k4G17!K8ee zJh&78M9zylDJhcjqVJB{tOrA+{1o{=Nm|5#r!P(qp(QHTN7yQHv?*kc$5P${dIC5e z#qU915g5S#GW=|yeH`Q}oyW=W#%{L`Fpr~r#oFCm{*ovu3_VM(5#~Z%QNs+zvAU;X zQ0QZS3cA_nxLwWab_(yhn@KaFSM@b;A6vN$j0vzU?;G6*n&TfDNU&XJd$J@OIf_Sf zdMkN#H(3#;UQ3TqUOeimf!yM5@1^0O2;1juj5Ne=&@e4|&mZh8S@ey4mr~sS#N6+W zq6k3C=NshXH9;F#NRk&K1XSIF_y;82=xloE_Ru!83Jy1>6K8&kMO6YI!}LJQp8b5b zw8J=>-sy8g&oI9&W?)OvoU_z7&4@)bRN*`W4+Z)%?RX-WvP*YdQc!E3b0CYNqm={Em^?Y@X2{BvpCVKgVnQk-AmTdh4CLfBm{O;`XrIA&RoM&p4w&?7#!uQf#lC-4ZLPI!s|E`km*)HcR%7=N zt>j)H+O7rD{)xR%&%`T)UYy`&O(A@I)T2>2N;^izxv%KCN0F)?;lxf}iGke7L*fNG z>47d578cScBQ%foO?Xi=(`o+5pjWq&H0~C)E0fdQZz}cZhf?p^zK=UZ;{V0^etT?H zHCs7E&#f_{h+Qf-m(WYHw<{!&u4gy4-0e1VX2hdXSnhP=UtJ#p#?zMkXZ0Sl zi{e_?4*%rB86qVcTX;pGPVUkVn_2yOU(u~OSgz0sd5uCt68R~VZTmt>r0Fx$980O? z6|WFPGnIfn5uqviTiCpY^4GiEl;@8nKA=iuym#NNJV;c_6KGtQwryT!v1F^KbRH|r zPDf(N4)kmQi!kbti=BGOuqigJPFJh7-9z)w*>Ab}F-ke3%+B0XWF@Lg;;)VM-)+M6 z3Qj}q#j066TU_+j?_5%`UHOX@esB_L`)~q0j04<3KJSV;C03&Zh>C8FoHY4Tb6VBL zQO@YUQ7VTdtn5*BgOuT%SK>Htz@Nf~%S_QLQ64^q6Lk8Bim$qVH_t<4C-I=doecq8 z>+#LDJ#Z$v`umwyaWc*a96DGf`VvtaDOWPZ^yWLBL;bs1rork>LT#=QNkvj#OFBX` z)eaCb^dN`I4uI)B`khNHbmhjLR*j#{J0XlKHXZ!JvFv*14gdPu3CPkQrziSRcZVx= z4-Z>ux?2#|#${{eR#8kWjfm8zO=Cg^CnKZ@=H{M}wm=6DV!W|LRQLftzN-NKdW`wl z9VuTEfme<`Y5{bv0ix;oT{*>yjSse$J_D0H_zF|*j{WozlxeB@n^f4E%zZ58)=v7U z&|cj=GZeD3#$*_KzZHQbpf0N|3J{w@rZ5khjp7cz>{Kgy3h{N zN_a(=T|f%o$oK4MxFQ0P{?rQ-_9bWzSO>OgqAdx5V*6NUXn#ir5;LoO7v!lPW@WcO zOW?Ac*UtJo5K1A%anN;Rh*87%jNOo0UYnC_-x%RkS?ytQLLY@ zPz>#8K1*k<5V=k$<#GuGWIJd$ zIw!m`xU2!Yl1w~7b4dK})Q?;O*oaB7CgQOlB$G$_>`5ITMp>WVnb?$n4hb_Dk$*mCLO1%3cveSnh*(?Z3J zd?acd;afL*qr=Z#6^2cP*QX%6w&%G3BihXK7kmRWmY)+@Da5J>U3W4j+>I1LwjMV+ z+nC2$`V7M5L*TV?zqnHxr8he2E`pT=2wRi8qzMO4HNRwHeWyoC(Dnpf##YU@(#*m0^PgH)Lg(e$`Vx7+=(ZwThmlmToK&fS%y zwgu6?tw7;^9Ahr?w&w&+reqVvAET;qt0*IYg)$*0^zPZ2CQ*9P+V^ANHcs1ZQyYWR zJuq+*m#gD`nSk8V(T3T|vwJh*m%cS)t+WcP2pUB5E8jVhbroGV;mJ^@^rNsX?a;lp zz#kei%b`iCNTf)wLgV`Wil-B`Xp6N}0HICIL>F)UpV$tC>Wcns3mLlMs_ec5R6-uh zS0DQaQ65Di^&a}fgE6L0+LDg>Ql%_Y;wkyqK(-QF_bXfHuos$!?s;F=Z+*9Y zKs*|i3YRNEoTnS&qSi-D5XH9*TDUM>XiAov!`2ir#J0ytRn~(^N-JeIt7As_*u_Q} z0bTN8HPF7M^BRU2!Cuqz>tdQ&V$R}mOjkzg3bB}RDpD1E)ye<=b;ipHJ zX|f&V|NL$Z4b~{}ubfQ@M~PL(8z1@xm`uJK<%he2_hwg0Cs)T?dZOfnf#=fKnQ#By zMpwuAWvCfZQMxfD2jSw7dIcy|t*@A4hwIlppsDIWBn5vp08MDe?-*aS*`RH#f9vs) zoND_J43<*4(D2eirky2yOrYD98t1W+WEJ3B?j&#>lRUeaRua8rZ$J!q_Ru7Gs8f3N zW8ywstK)SZ>l^X@TkBS_QM+B{wCCS;X$I)>RUtNR9jN|cuff)(*;9mL*OZ3!eK*1~ z1e@+sUO_}S;7!m*Sp7Di*+FW)5@Ax57i&Io+71i*$%BbaySy`QRK&9&<@9QKGlMYD z_g3tZ>npw`fP|~C<=9fM-6@WNa)!kb8R4LAa70$qQc*eS^J{^w<$W`Q1|37Q$_@NPx9gKB5Mim_GyDB`+4v_+w< zFDOngVfhu{gKbmQ&>whn!=o7{kaFup^2?4I70xSAk?=@g1%b$P-MEa*w_+|`fGgnr z^W|~W{{w#jY((nROibSwY$4icIHOCqhpn9hcex{bOwO5ablv&u67IrZ<2 z&m%HVOOM4QQL1s?_BYult!E&o$tA&yoS-u7g>r2ydgs~|cdC~Ux8NjPZGX?`jO&Z# zN&z^L%2-|wK8|+}Fdu zLbu(SR(spulTRFId5KBhuRAcaejZZsD99341vd}PkN2V~KYo0A(`2Y+v|?2>=f}=H zLG67Q$%8X-0BXY&UU2~V*{6dLl=RKY95X&9g1Qma=PR>8xS{5lvdOun98xLI_A&HC z!`5#hDV}!D7&Rxhbl@t~Uk<0Z4RPf7G#_U`V_w6SU0XvQorON-+?4Vt1 zX}8=?FqaN-16PM)qa{wc{UbY<1Wgsls7XuVO_Yov#<|q`J?Nm-2C*t=`=$j!2d=GT zc6+30$DB@ck7o+i_>4dD;?@H%uiRLDzu!q4-yE#(ok5nger6=iFTb*&V(!`HeOtVu zDi!ETQhnE-780|aZGT7XtzR8yD5PYbz}xP>dEpSoiClPvnGoo>(W%LJeAc_ZMwU0we^d zVf~Mliiyr4Fm_*xmBvFeCXSX6OtB+_R!+aI@)eronLbr;q{W*)t9gk7tg_$ zfP^KgY>~Pa9|1@Ol3L)VeX#pfP$ky&zyN5w^sd%DwI%mZV%iff3$~)`|Ch4|!HiI4JdqC86oAK`d0QUSXd#6P(N$xt;9?`&_ozM=%KiCn235iXs=W zj~~1ob^(AoiZRXwH@{}Y0aHqrGoJyyUP@Ux%G9^*Fs@a{9rq&`k2(JSHN>)q5*>*tQOoPm0A=+Pz~98V`HC$@b#U z@;@=RWs)zHCn7J*|2O+;j+BA9=Cm|?ec1kY$}C(>`^=%b3`43QusCbQfmn)Ka6ge) z1uf=r-U8Qaw-Lo0Z%V;?>e*%Ib<213QJh*LMdZ8h68L}exzc04b0Z~Z6`-cQc(l3J z+3Pr-Yw<3&O>chW_tC@z$R-GSN?9^`fKJ%@i!W_RBln#9oLXm6L_cc9sZ>w*ok+J> zlvFs@yCD9c`tyK$ul!q#K=Nf=xX@S6=N)@f2A zy|mdo-Pd|Axc|~nG(|j9{7Zoc;bzV*R9C8d>(NQrh;y5skMoI^tM;LKEXg^kb@*Z? z?H~C^@6yZ^H&?oXB!zw9Fm+{M^$xrXvOA;jtJE?2Lb~SR6PQE$FM<|4D@lMz`54YJ z_QvW+^oAI_P1;d5?k!x!AKeA5%MxYpFzpFKy!(Giexqdfb?N@*@1md9cn|vOonyx4 z9V{4?@z@>|NKMV%vMo8{fcQx(LB?JLA_usFre7H*PbgJ9H}3ki3B6P97G5za493`8 z0g<(~z3$Ka!;KqUsN{w|_7~lQo;-uq@d>hhur1J|Hick*2`%_!5Bb+xXnuflrp5bm z;cDFK^>1779e(PB3ERnOQA;A_De!f7Z$gB4E8F9OU_2Am8@#ke6StZ=zB;}HWj1}b zoOidQrz^?D?>Fmg>V#hChd;I?Ulb(-+`5e_+`GF<@1YXYT|~J21mM1M{dB%z5=QNC ztpeAZ6^L*Yg}U>sT+fT(pFk%&Cay;IZ5gky?|DLai6T283Dcf=KpZ*gy8F0%%f-i+ zw}X_vYa}Wx=l3^LOl!V{=f;-$w4n8_jDzf_t$Pj@!HoGTYp@=KMSvFhVa8#i%`KxK z&kTXYehW_?yl#myR55g8P0AzO3JJ{zqyA*?i)tmR(cc=-SDl8xrv1u*6jqCtaAa$^ z$8CazMp@eg(aHWikG0c}_3kmzH~@>iV4i?pZtsma$_%zv$KCdS?^X0e103;K-o?em zc!L}n(}^AUV{pMrkM-XC$1P899(pidobSW>lx}h$0f(g`lL(R7vbohz77N-sZdL6> z&X^huJ+Jg693?nOzYa7{YP%z6`}Zi55Kn$|NWjU0V53n!_r-Sm{%6L=VopC|(OkX% z{o{@vp%-}Qg~H&yFM`%7FXS-Cf^)mpch6|fZtyOhKy4URJGRQ-eh~SoeqxR#z)g#z zuyD3mxm%((yjT+g1*vy?&(WT_N*s`V8_khvLHZ_wi~LR?f$6es6FDmDbsSCp(i5v_ zoa7AfA<0nSLkUlKQp}SmAB=vvyPYTG_L?kFu^lFytWa9(QGOo|}k=4cdB;2RH01X87r#73%>HGP`%-%fIn02^5K&EjwPU2SJ zK|F4DI=H`-m_7WVED-g0gpIPjlX&sCvtCBStmgH{^KSobdY(8kX|g$){4!R)>r0C z*5Z7uiYGso28{t=uM_QgoJ3qukI07SMzC_wWRl^@PlvWo{vH~XENgm)x38nQt5uIL z7a7YGkPQCz6|hrM!P`nm7NKQK(}Dj1Cme>x#oz1WaicyYwBOqkuY< z!gS`ReuiM{?AJ~=77_yvHyT5hh-%Br;8N0a1JshoZ~x?CYV2oCR;zCMBsQD|&0o`(?CEv*6hUup3StJ-E>r`2rK08npgm7A};J31NMgg^{tQGtK`z zWXeK^5&^SVNY6@EWA9kL7aa5HSIjtqCvP;HtH2A@rzdb|OUTmGY4MbK>_+sBq;J1+ zNIk4R*+|l_l&V2QM-yWxYY1SV7O>7yP|*_Ysq9O(OT0s|3sabnn5eRufFmb$qDxZD z3Yt2d8NT@F@+#OihUb&-avbGjS`%E0PyX7z7dGpe<}OU~9s}Yo>=b`hp#4AJDn$SX z08DI=Mxt3PT!-Wqwv(Wz;%*mD^*M<~{sQ`i)9^~v{=pgKxQV}uhyfnsDv?}dD&Y}i0P+^G*7IGUs3 z?-|n4jh~P5ZQR4gnLg#P4_k$FMu410chI3 z4?Q18$|7>Yq5dAzz6tJyEhK86_>sba**jx~O9WJUY^lplb7Mk!2)uD9@dguw?v=q0 zWl&>F0HnY~MJ_Z3088k|+}H?mRPETob7NW903_J0ouE$Hs7ozP;{+WHe^0&zjMYpb zX)Ew5sCI}1jxBO*lsVO=1{%LzlYP#DHYX7IJI7J}!5WWq z^HM-YVx+dT^fOHOYu2*#hKjG*Of9*aq;FH@%v7-}G{5+peJ+ynK+eU4;97F}*REdS z#`A9uG7GV_PS<#nePO?HA8x@nn8i~PaLVxjI0?R@OZGr`{0=tsfK*J5)_e(eSQi{Y zrBJEE-U6_DUIRc#Y~Pn-W}*+kUI_5I&F{d`J&|6**LNQ_Yd(bss+vHNd+MD~Jc`u0 z6nb0q$H{Sng9)Do+xMwey;9+Lqae&=LXT& zZBo_!$@X$Y&q(M15*PO-ULHD+zPD#VCkxYcH*mP{pWlKo^%`FPgiG>OB@ig3o0@Ms zkFNakTZFmP@0Qe@sO-f6p6T*!zS56JyvA>-Z#Rp{nqBi5HiCrt*tGSH`KEg>_(W3O z=Mf)^+Cls2hemm(Z-KRg-m68ow%If;v_Gn}Q?77nT-0n^rJ2#Hf+}BD z847A2xgd2JY5lX*dh#$1db(OU-Td8tB>GyI|DncG zv;Du=XTjyI+%RK@!uGOv9M8tELf2=1&j$+;EKYD2-0%f$|02);APugR1Pyi3aDYd7 zGq^wN6Y7e^9IwCTuWUn>UQ(QVwVoYs;ThS&q7L!<>1}*&@IkZ-`fs)QcRPQFAhL)1 ztgz0xmgObtr@_SKn$%XLyIQOH5V9>c?B`T(zKMS1|H;*~vr{(znZG`wcaz??uNt*A zr}(c9>*p90x{PKGksZ^Q!pF~6R8;g;J6V^#J3b6`7ox$c+uj5sK@!F8Cz6%A^u$r7 zz~+BP*%6dUPY>cu1cei=dSVYe?uAI>;b4Yj_(wtf!!KdjB%%5;2wXM2nth8Mc4m7$ z@JCqlL({f{`4rj8Rv_Z)PHuTW3slFxLl+hw`|p(yqJyy9TJnmPR?3=~P|zLUIW?c$ zDIG%k9MV+)DBMU`*1?HWEK>UJr=RqulL}e!yB`QCB2!l_MRByUsb^DQQF@M8K9Cqj zpzJebJwjRb)A0*ay7N`n!jw`XfZ3JfmiX$+LUFAwZam55R6Cwo%fC1=voD_Z`QDp2 zW@5tjAf~akA1kRWmLUQeGF7!xrJ5 z1=2)ti@(6yK|TQc8==!OTvqVWcAj422*mISabwHx{Bvq2VI~r6i2l;=I($*>>b)_x z0OHd<^&uEnL#7_OlrGx1aQ_tT1Gd%yXtDwm07K3?01UiduQnard<9lE-DpJGrwaV< zV3-G^ewt+B?iWUV9@l}gt` zCUq&5Y~MTUpiZ#kvKau&z4+YDE(OI#?>BrtwxnQ;_+=F&D+%z3a@=N&ls^$o@3y%T z1C$!ij}m~dBgr_nUGaU*m`6~cqAKWEGlo&TLGNb=dCB+9>7E1lE%_fA3Dq&CPtK-aw;fVN5r31x3hf`gbvG+gWLO^GQ#_I2E_K&c>iTvFYRpTN4 zl74SWb5BNj$zweK)f}_|X`PFN?XQn^#rkHD;VONuQc)qk$RnS}M`l0`0u(TDPr1WF-jL7T-%V z2t6KrLJ`_t%D}GN5mh^1dA0DmnqXo@#U$UJ)<>z11#Z>)7l}6psiC#CwQhlkU{E`A z>=d~r@|M46Z-v43?Ea-_TJA$P-&;P$;f$GNm32$n0;3?Nr8de~!YuBCYHFF(p7(u* z$Yf+sQ3ukt(UN0UoH_ZcYMd|g57!!|vqpSXmWn9%N%^L6iL%ch_ix;8teus+lu1J- zm@ex^<=Nd^lGVC4u8eXj&Sr=L?q+NMhiEb`NKV;pBzz`{0>%^MfP8j)zqLr6wbqCn z*rw`6(*+-|5N>`5i#)ku8tldB|Nq)6 z2>rK~*0N^rX&iKW>+^uG!j-TSbs~{)k;RG!F)f}HTLxlq^|xs2T-N8oss1|}fQt~{ z)ViV5@yD?jOjqO_W7s9U)Oln7FGT_>ED( zu`^IowunO0sJ4@lcD8i|o?)xDpWP3iuy)!`STpN5Y`xRv zqPr6`0f@}{eq_HfE?vU|l|&)fE+zo)`Vlm6nlUeS)HR zV*{t_7JEt9APN?VxZx;T(00*%u#8=kC_kjZv)-_-$9(HCF0k>gd(IT19>?x&GJjBX zjK8aud+kgJ3}Fc7H4w7%n9I-zN4YfyPTKu03-qesb9$5-S(`=wc?-o_?wFQYh~1v3sjlA9Sp@VAR0_ zbLJG69|jX=77Lscz=aeb4PIMkzWk~&=*rRf_+nIN|8orMii36&a~IupfBHV5fc-N# zKS9%v$O-8eFMg~t#}|GH9I@a>CC+U0?5Uh7K(ve`%SE}3WOZd9y9hTU@^W@2_K>k{c`*Z|&??r^3N5=>2%Gww#)4&KT_{V*T3&Pqg_@ zy@I^&b2Y1ZJ3Cdb{ym@WDu1TXr!Q^f&pi#V_ehG%|Cc~{5}~ya%TR_S{XMUDtCWj& zTOyWB%b?eH6>bhS1xTulI|`XRtH<=PCnv zs!U7AY#6PqT z@6z-y!GTw3SZn^`vrUTy_#fW;<%z}?wU8Ag(_(F<6DuNY@`(!DMCO3TD5t;n>;b@( z7Q(Paf^e;k0Y{7`W3Q`92Vvzq;q80fjCOuBQE%TzA#rGu)94S zO1SmIP~wU3rlMJ!=ulmFN{o+x@$^PGf^B*nifXAaw2wqoG^Bp7sNJ+%;Zy5BQLE_S znP-|mk_q6eIqZ(f$?`O?X91DwazMEmHzJYK0o{pXNi0HBnpfO(k@`INj9Ia%xG{;m z0yz<}`j7;YWkk{wjnEu}pTg<%)zwwaiFl9J?%WD$R*LtK?*vnM>1BAe!^{bdbkiI` z^Bf}|YSCOV)}+MAggz#T_$YawufoMMJa>aAIwYy2RpI4fbv_iE$Cn-dozeojLZF*P^P^XK&Upf32l25QhFgt=FtXh z1b0i4%0}L!){jm=*gnYdv7}c`D9t?pG^MnAJ9!18C<+VXE&aulVmd}2MlP&M`Zj8D z=@w-7oNFnz?|9B6RJjgS`C5s}BU;iHzGD&SLc4Vn<)Lx|S^}U@c@S^dSI9XTALM#8R(J?m^Yb`+&CRwpgA`7loEqj)f@+Xh)sXy8z z_@R1^T6O{jL^v~Omb8+l5Uy~0y<$pVlZi$wWF^aUCceZWRd0~K-GQ8elx3u%98j;% zp!yd6Wu}!{VJ(y(s6K&;<9clOJ`@Iiau?nCGkjl;k>>mgHcDh=4y*qgl&o%!_8uZ( zao3Pa>H97 z^G4=b-GLY9padZHyWo_gAixCKmKy*(0EG~B=gr_ARv8j^y#TSMN=PA^o1%UI7 zIYy`3p{_hcoJymK%WuV;>C^#N1T^F`=04@^P1pwkPV`CDPL}nzF@-fsD`{!SZ9F^1 z)&El?2|+{V4J50pH6*m=VwMCxKR`4+7R41OD&T1h-1eyJ5PJtnjvxNho=;#Y52)C5)!G9^CUTt)&uRj5zol(pboD2%Y z1kOCRqiF9nF9Pq6K10caPKC3&G4eK*&uyG8`?NhpXYhya#nU2?uv7EBFClE=b^Z+h z0mF-r+^(DyF&_Q6!s|Ef}IU>p-kctPnsvj-3EB z{}}R!O+R+UngGrLD^TpeDaeA7CB_bHp;Q6>-}h(cKt$ruE$D=XS?_@?+gQpwT3j(k1=psNJGb4F|S?eYyJO&8Ze{3?3Y;7BEI$qe}E|LX?t;!mT5#}G44=5d^}!xOAxjGbM)sDVz7DxvyGy5EwOTG$i0=Q z!P;E^!AE!J$d>-DfxKn)4dSN zt37&Uex6A?zzAZ!p8tg3is7qK_Yj%x2h3w{3xY4?vaJ{pL}U>@Xl0WJ$^c~;t;0-n z`v8+J+QO!~ZKWtcx~$1JZu_5E7cFI(K)DCu_W9f2FFzkZ)Me6YW?e|(awG5Qsf)_1 zy-D$f4Jp&cGUnlP&BFef;hAm*Hzu10eRVW-di3zns+a zcb#er9K9woESr<=GxE_aH~L$s6_WnhvHIIr<2I0?>^5A9tCX!S_*fu?L#sg&LU&RfY&KE!`rQw}1JIbPWrWLb^Nm8VxCQMVP>f)w zP0`>Fu!aJKm4A(g7MAR4$HE3}X%(k%wt8{Zc6!EbPwxklI(ZqkOIfs!MLG&Dg>SOe z)zZA%?qtXox}G#-YM~$1n5}wLW=XqFwanBEeJq*}>z|Rz>4+Yh4-rmTeu|0i0X9De zaSVtkIh^r9K{6C=^$CccF!*t5b>?Mp2Uy5PRJ}BCVX1HIXodRd4zz-r&AnlQs+@j- zkY)x+g0kOjv-WS}G|#%e79p#Jjojt+OUx={-Jhox{m0c=ThG{sKCYj~qFu_PKjaNu ztk)JgfJ?YkCWMb84H9~%DQ@VdxVyDO z1|HwTTWGS-ATA8&ZunEb$H;3MLpbPQEaf>?bmyihYzI}%d1n1fjMWfn@}QIv$tWil zRFvddvZUBB7;rDiTfuQt6sJCI_X))!K@@G>2*y~sBaI@2gNYOic3H$REE?OcyQruH zh@&5Htx)lF&UaJSgez>aolteS(MmYx?AxAnsEwq&;<00!^?ZeANuRV=n4QXt@y|Ul zb-!G)8X$Ik@IRmg;5b5#ML}u4dofZ^t*~+@i@zlD??jyeh_>q3bCeZ0k4=EnpAy!^ z^Uu#}CdhkrBq&O~jg~}nYGh4?I3X{h3tRXYRj*YwNKk!W|BxXsbtN>n&7}wfa}Mt& zkD!2Xb+-+!h<=5q?y%cz)m^MsX?%qXGXEK-$KxC}pvc=9IsE7>*ni1N2}yuLlLu|l zfOm0`BN$i{3(sBlFYg*G3_VxFIHz~Ko)6_g8wq?nuut%N*V#=#A*m~l#BAuX28%#P z9oE~2KqLf+G$OqG9C(=`GeUW$_%AD)_jS@>dZF{FN5E4?d-?V&Ez?NBi3xbAy+SAt zU>0pI!G7qB6b<+s%p~j!9s*OB=Dg7SV&hkG=!fXAchYxART|!3-?O3Zt^;z-q~pUS zOi?Hq#c1il^6oyhq=Q=}m`4I}&}c()XLg%jXWUWZ!KlfJZf}o7AmNkU zxI(IlR?#{-mqpV}?=;zQ`1f7!1$S)U@i=4VujGPklNzuZqbB(X#4NWnef!!v&t`~uY6yQ%Up}LXRk{@m^6@$4N zmgj0v_BEcGO;zCrY(K9r6;!XCrMRGq3N898Y7hx!Aovr-dEo{^`{lP|vX62$IPsUF zn^}6sh1KlV0FHLjt+YN93l;PLSl?{s5`ZtiDw{ipqz(U3lUmV@H~IOt8tKZZzuLRG z_y2<5J+(;H-5I_S^!^b0*ZP#YXy;M(F^H@ekPTI+*^0TUGi(+Z@EKgF(OcQ-SsXK* zY-Pa6RF?UfSBZ(72+!&_)+4R^zMjb>!8zVN{)PIK>Ys%SCrL3sQ1wVcS7(7Ez~D#w zyyK6S-fWgZ53;FziJys=DV>vCPOg5i3r?9BS~-px8DB46&80rS%XW~Xf0%#T+YmW@ zZF~zkm2Ac%xUDWU*Tvwy$NGPyrO7_VdYOsUJkQYQFf#eUHYK5EczBH&a=`e`IodK{ za~7|KQ^RDL%b&@<BmD}(d zd35aV6){Ruc-yQ*Dm8S?*0o51yv4d4%5E@)dd$~7 z*%`9zgW+`1gIt}JZqTh8onXN_htY<<=dPA_)><8c>Nd;fM4dOcFX^{JGwR&>n2wM9 zN1xw)<|?jDBf{N5R2f6p4QuD&psunnDQ}kYc&!2k$laFJqgmS;W&lKWxiJ;*up2^VcBOvM-iB+qj4q8h#=-PE=d#!QKfgj}~%&ZkZ?JQIPYhn)pCNi`7O_SYlk#oS={5^oLG86@+#FXm3MgZ|}-# z%9#2Ve9QF#HzEVV&h(M{>Zyu|QFXy;c7H zR0UFDmL9Oh?<>OVIM~sZ$S>76w^*w&%~Ot);Sw(^Wpu0%UCqgG^I(YD0697YogO&ebf=vTQwJ3XZ8wS5`=`-~iS(;>(ph-*ih2!Xy z@jL3p#cFT1=tDh-_GnlfO3LY$0D2!agerR=VbnW?VVt-B2sd#rcgeFPUft+GH~Ks3ZgK}{vqsGN1Q2_; zVjRSq%pWYHn@f+V7d zT&G8owNp;t<3;&n8hLDIX{utHeCp+nwnW2CuK;q}ew4?ym}@Vl?(=@1fg7J8P*?4b z&|AoABxFAu|NQ>u3(OTzH}>W@r>#c{gDX~kI2a>Ex6P6@8a7Ihy|$!G-Vw^9aW4Nu zIr<#F<<@{>xY;LwEUv3RLn~|auf)`;-%ggdL+di@?i8wVqW7Pxsf4OQ8okd23?IY) zUi+*HyZtrBI$I?p(U9dux7!8|@`@H?6)U9|t9g5DlZ;ul)JO4!Ku#cN7|W)_*5~0P07dv+4qC4^H=jgd&#h-?vn-93YAS*rmLvT}h(4>I8h!RX z2gsMnd(6jK)7zWbOkO+BZD*6K8T+d{FN4TJ*)EK8PHYmhnAc?!oL1ga|K3uFevFPf zQ$Kb3EP0wF0(8euF`iNzNXFXH{sD9?vJHQ&6wDGHdN#vf*fJf0!91`-2HZN!Mn%z& z@o!4JP_9<@r$0$3qo#KCCAII`DEb?8)WwzurYzVk8Mq1pRXXQ_K>yAq3GB{ zOfs$H_C_MLPk7wCk{tjRLmB(Xdvqr={LjeOf zTuG?tyNl^r6dbVs7A%w~#ZV)QWEmTkT5M_+b@$OOs3Y&4`3q5zo*(0sIdmzosAC1` zlzvkW#F}DvNxkoLg-$(YRfT4(u}27_*}o(Ahi(v; z=rb}~QDZ^F1^5=o5Nm1QAUdAHz!z@y?K9Pde3MKQ9;aOzOslXKN98yM6MUdFe{Jb3RNv*F zLbpYx(BDYXv}pDknUj>17gR1^Rv!beJGrUc$&{Be zesz(bzgON_9XO2o?5?%D=IG?kEKXw~{vTEFz@~02Xer9-v&`ug)T-i#+c1W+318Q2 zcxd675|(3ZxmcH9FOG<*xNL#&jtlBk$&aZn@U!>&d^m9ou~pPQ{pt)qYx=rcbG%`7rQfaMKM*$>Dmo^oHE+luaN6{5gnfoE)}5$6c$>l_BrA|He*f34n0&5)GQZpOzZ~YGK^s%) z%@~+?V1>tZ#ZnBbe-K~4L;P~zj-*n!7aBD z1&&Bjr58}w$(7uvJZ$NDWD-KM={^p6kQ2N2m9-b^zg|If5wh0*^eBvWNvWTiSH2(X z$4a6l{gs&^`5$D%znO0% z+mQ6LnxdlnuPEjCAs)q!yT7~a^1s2_4~R$lrDRIb%C9l#W_ETWZJ`?j7G~TBdrDIs z7>U!RF8wlv*xEnvdqRO#%-5q)t}!pN~fL?z$eV>Pi2Q#)H&owBh27a+kr?;E@U!<`S9;`{r{(2)Q zXx;kk@FFYWtcr??A!6LL7*9O`Qd+?{YS(x58h>}zV(}&0RQle2;Hn!+W^3Q|JE-b_ zj=DwICc$r!RB^S5tG8ScX8-URSy{$C4dwYK8#gD#L}v#)+5*RCW@hq~*FGcMNPoMD z(MOaVWiYD;DqsExUtWJ!n4KS^&T&9#bbs6OXX^a5(d=yY4#OI17v95n(b2u0yVIc1 znwXS?eC*H=Vp$2Ujc_8qlZv8K5T5nzfL9z9T>tt&ub*K`b|WJK@^9s(^qomuMjQYB zNTftu%Im8B-vwa!qh-&I0t+`N zCDr8gm3-x6KZKw9|4fi?E|%Jz#cMd%))>Zq3c+M=QwUc{O=w6u?YRrqF|?b!kY#6> z_fwRU9KUT{=x6h#Z|GwAK8lDpz|@CzHYmg%A<^`q2<^<)f)|~Hz4`AHJq9h#f>tCf#%V?q;%GB>>ibo7d&>+FMI%74} z!39ay22vkEpUaP#bt=kVtmOh#18+7r=<4UoTQjQXN}rhR4`iKuaZPjJl2Xv*@BBR* z*~&(&SLlqm4I2z<6=7vxm#L@7vWER6hS7%e!xd-p`q}rze+)ibt)$ASamH+|HDq0; zw>}HjzRPRGYvg(W7I;V3%bbo(t8w;@43RUA)6}gJr8WXXc}#+==+DT%frPB<^1XPr z_UrPeT9fT{_dP#Xstl(OBV+BSJ|+0nj*U7GmfV~lId;2ctg>c)`7y6mf4QqXQg>#$ z>D9IcE8n%D(!i{gFmK#gXqeLY^0T7`UNfgemSrnPVntn6E!PQiV|nh#*E*ePX4TxB za&O6OO%$mnfV|tvH(+ccZ^0EL`RuTQC!B_86fCnYN0t_is)==d%emD3k4pqBKok8h zeTLfUC;%Q%qes&ETQEJ@f~G9pV$x+B=3Uf!xqS{=UoAy6LvK82Yu4`%*nAOyYc|AJfEsGjm26me&ZEs-nikFM`e+>83}{YkLjG`CY6HQ zb4dQ$(xgLMmdIaTU%X2zExI9}$0}awoaa--%ceUjZ>q;8e0Fs@FG;bvVks42xicB7 zrPV#}lFF`6{yP0J*O9T<>nN}_#A7ObpY`CtWesN``TrVf2oZ>PEhTfAMlwUFNdQ>RG@+=B)$ko{tG+XLV-6PZ&VJwDxj&SZTq4Vbj zz4`6MR_g?XORV%a@k*8Q#qx!zdOgEN0!{|Y9GC|M{tqX@GHEZrE?4bfu7puT?v?wb z{R4awUy<#%@^?(7Oa-F4)RtwHsa(+a%-Y^A(bv|h?5joK=hx35q=2UUG0`qMC}V{E8*vA)<2BqZA2>gbahCClUCO*82j|5=Veh}Ao}66 zxePh4kw2#!d?y0Z%F9=>Xspom($OBb>2hb?iRv#FY8n|1-L&b7jnX0KNxtdVyb|E} z50nh|8Y)k6rj3{zGJ?KN&GtOc#?^Q$snmB z2xaU8@;(&{36+dMp{^YwNXs8{qaWIkf?&?2JnNH~w6~ABJZLlOJ1v!qDjXh`_o~k| z$gK$B-Eti~-RH4ZwYQP;S{vGOq}4=f6;+7j7!|<q#fnR+1wMx!&g}b^iFc*g|?(CtXp)E(BA?g zO^G0|Kd~EwJN+NJzC9l5wEchDwmiFS*|uFOr*>1GN+?AL9k#3zmBSF@5Rr3^lhb^( zHa#V5l=C6UX$B+XtTCjL946Tz43iv2;~2&;WB6U4p?bdG*Y|h-u`51@`@TQ-eI4G{ z`?_vNk@pa~)5{w0Hh;#@+2)YmXJZSD4J4Yi@G8b2L(Ghqx4S@oe&n>SCvn0i#P%=> z;TKI%z54x697f8YGjT+L84`Qb;N5#DeZ{z-Wi*nTStc%XrT577_s0Zt3ms6k0zSyC z6=gCI6+cG~0D0%)D?Rs!Q^1(S@}yN~{^9(}Kwez}2Ql9V-N6ZFcw(Bh@U6)A3eB4l z0AABtSZ2y2Z8A*I&aSdcti&W31|cM#+)HMN#01Y&Qt<%(1a~I#_0ZB2KC}2O+RHwi zR{sLqOgXpE%|uqv1)v>iPlU8+O|A(+qpGpO|ZB(f4~$ zy&BFAM3JE5T9bt}8T>AQqPkAeSs8p1r<}ybc9PLOiX1VB5O)ADcl*&SO&b3N=QeIWvxP7Qh@ii!dvULCz$3;(~VGg)VvqlN@Nxt!CJF?g0N4GulJ5 zsGyZqU9FWyI{}1I^eod*OV6GJceFp@<<)`5xiX9_`?U1DG)pQ?Gi0OB8Vpiv*P|&i zuGh&;W^dU-s}7tAMLl*vOH*$4$UB*6Nj=y1ILX?WN(R)atNgI$w()W-z%|CBri?w{ zL`TE-3g#|yh8TzyObz5^4$LsfhZYVuEL#W(+*}vO)nF0EpiD_={Fg zoVR&0~B63&YVb}fBu>if-QCcFZFvvNR1C~b|d zGfffeYwuS*y0DFTae^`O?MMM50B}*(h!Z`CRIaFnMi)n0Iyhnw^b_}()q;(MHUP=K znK6c1hpNVsHcN@A6jeQBZlTjEXuJ$3DD1W7F6>+8CMZv8I9fzhTT@2Q>P>dz$LHLu zM`M&j9NFG;rlqc|!#(Wbq{+BV%9>gu{a$fsZQuXuD4)m@{V@k9k|GWB&Q(> z6esn)mh;_lO5VCx-JL9>^TO9olU8=3zO>E*gSSGWC_ln^!mpyzLjpG zJ)7Tv3xxQHjJaVNejMR7N{$PfSFi&;?e|a8ZCY!HHM1Xamh9nSs3TS`h9g8*vxNMe ztoG3wyGjd;?Ut=r-;pXXbGgXmvIrgBt?{f}6~urFudvS|B!ruEAF7{20?G^`>49k3 zO?-^}{ze}*q}S!vH@17n3k`{_xdg9gW3fcJJVK6Ug$VFB4@f5#ntxarXJ|04$Pm8M zfCqp?cRb{AW5H`f8{liPUZ-vie7{+=Jw1MjNLhK z?H}zRJkulllIbHMyKIaKGK|_CyqT^UAn)M-N{d>)(||;RT=|c%V$`?cpB^gw5j8$B z@68fL2)y)-eJfBw(N9ac5Fg0>&c{iK$s#pMs-_?%%<~w7Qe_jiQ9|WOzoKp|4kYev zM!K1hnE()DS592 zY`!YQ@AV3szi`0t1sqJVQ zw(XUpmHZ{nB#HewbY=UmARtS89o|F*Xg>+9OigfxU@40Z{xSDVyOe4}y)5!TF~Ki_ zBqt_qa8I5yw=oVttfBd@khGF-K;v0g6UEOZXcSc0^?w49WrNrVFLM*F45KezXe}I; zdQiHw@Y#6}FA5To5HM$Z4mkEy4D1CnuRWHiCEd@K&A09uJPfaY_gLh+r8S%BUQD$D zMDw2_FLd%N9(d$hTT?Rwd4F}$@(MkC|CGW=-`35IBDd!2Jbljmj=?k2wbn+ih|tfX z5;@X@g?4q7_wCAY#syb2H4K`q`}j7BJ8VXb$*&Q{X3Zo-j6; zp?F*SNmR^@Hd=8*7^acd{{#y@_d};WH({Zlp>ej^J+GB$m>-o`MBhRz*cY=Y)Y%s) z;$JOAZYKiBqkL4y^Vt$1Bk&wIWeCk2wf!#L#rMq|+T$Ssbv9KJY1yigR9*d1t=8R434JT9{uXQ(~X zt?JklC5nkWQPi>4EP?_dwpGGlKh~+9Gz%)ZYPsMYT~Df@VTsRa-M3{UCe-W3sTo>&Q!H@&jZH6j-Rt%7^n*`9{w1w5XyD1%uJ`Z&mo=TMCFY# zf7VHNUVT!X?(8dF<@D~z4}*_6JI}F}J!|_d7hi(?Q!9^H;Vy++;DjdpT2}hJ|yeyV^a?32s$pAXJ+HS<4)?sof2XDQQpQF3L zLXl-FYi+lYAAcAgV3cuQ$~F2}?66bqz9jses{jGf^)pBer~1|=7=uBU&vslz;4RMU zUBe(DOmJ4htK4ix^!nPL0|PTVpL(j>O(YXNEh}U=4xTXJC}t|r)9U)7vWCJ+wDR2% ze;(t7c7)_3fF()r(U_`#fSe5XKE-{^v0&^YrADObW`L^GBFYg9ZDto6NNambkljHj zSg{BFA}!)SvFU``wMM#*4Xrsp#H}}bmpbX)Ni1{IA_{#d4F+4&&&*gxdH6StSPDye zSf9KUQhk$eY*}+_U`(JpB^TaWbY_#8B)V9e)i3>Q=h#mmz)~H#vuJ}!PCN+SH;}Bl zp1H?RBqxl>g{dk*m;8_}?-OfO7@H0)u%DNmzUkdyCWpH`@fh41F=8inx#TA_IZ*8e z-ISM`)-}Qd2@EYX@-pvHw50Oypn9;DR=ySU*S`mYy}y(Z1Wp4_YGzTDbjxJ9jL%MX zXJJFKtJp1NEuis~MuG9^=3=Q34IAc;=JoE*6y)vBnt|l-*1GMLBgXNwg~(#z!ejC*DtT)QK7Hhz zx^;kuXEcyl1%0%n8|Y&II;l@PC8ul+utz8vjcJEUh2-;JqA^8(<27_7a^fO2lW)UV z&flcuMk^vi8I9k3eIqFywdjfnUWwujXmR+-pmK`qmCZ=10s zr5DJWX_KlRU+|*Gz8o8^W1J<);j;2kzcV6v2XqTZG96(>hh)~4b)f$;c39n5|5%Bdu9Z>VKeE?MH(Adxv?t0lDYUR;)>EaLl&U~Ss#c?C)j7HzIbSV5{FE+AtH7L73% zzr_(LYlvbKd<_Wt0bSZzvGVE;gL>1^e5Eut>T(7GTL zzJpPO%_@YBc|X;AUii$eH4WEtXs^J6^X%^dk~`x8>c0KlL%XJRA1R2GNYtrJHdN8Q zzig)MZa0xbbjAL*0~W)2?$}KMIZtWZszs~9^;hO!U->HGUJYj=qH};)dwS?7%Y^#4 zjT_@h>+qK@4glJQRrmmN3segtb{XvRpyw{+HTm1!s3|f&iOvh7dlKX3kwW)~( zGs=N1eTv@vLMIO2TSmeoc20~29ekg@R;5!5FpwLxWq6|e8j{0_*)?2dV$+`cUJSG8O6Ol} znmNt{*OxoRJ}+x3bmZRPWHHw5+YHc`ZR&!-E&L5+T-}TgY#?fo>6Vqf)9%7caPJX+ z5^}Pq98d2^Ox(H13<)ZePU+FLEQPAi`z?9<$%Di1>!=hl_8h%chN)YPXQf>s(+;cS zulaj;csMTKpjt-xA{0wUc6{<$5TBV1Zpl>xfajS#Kfu_wp?^f(iOz6g|;3sP(6|Wm%vpHiy+Jn=HJI8t|p(1ru)u1s}4(Hd= zD~klb_|zbWx77%kv!ePR-NV1`t_Ug9Ykbj|{YS`&bG}2CwZs2_qFvz-o{9e<%LK`A|p(f~RdHie& zHq|zAHnrK5h7!EI*U3VTlb@(|3%Xot`O`1kk{oy=6(qX=`bF9EAxM>G*9j4r`pGA{V8SxQ75F`9aS4@;MtQQv!oj~PDa3Omv^UU>Y>Vu< zN??YArFB>N2plE_*1dFrb=#ER-Q{9_L|Z9ic7&sjT>$vR_Vt&|pOT_PB4h_%@*DWc zN8EG%!~J%maEmN5vv1~48%&(qNByx449o2jFZY`&V*O6}d2v&IL3>ct(Pv5Zd)7=g zOD7UE$gGt~m~ewh7*Kl*Aok*Ds)vYvmoF(s!eUt$Q!Rl{r%jp{2%Da6`Uzv39ng;g zgwSf;sxtsH7hidLA=YNS}@5`O3|FfW~mO}meFOkyVstcwq}p|5%# z7q0vU7*Q|oulo6;ba(s0R1qoDaMQpD?UEEAEe}&)**wYjYFl@)RWYg8xp9ccb1#UBEXp190I%)$W1GRD|%Eu4~Omo%ET?BVyZU@B8 zsuhm58e!9k);8mp60gF&O=ah0a34D9`Ac|i)b3>~ketvGWW*Y*dTLN=n&GCL$V#;d z0%!;cEfZ=6~;hYX`_*Kkcuw%eC3%)!6G6N|*4g+sTT<5=CAg8O~34W)!_c?F=>N1WKZGlICQZVK>@@qy? zI9nJSh++jRgk*4=|62%HaRMMpcnxo5rkzt|0}NET!waRia?Hpi{{nzkF&)po&cCgr zaQy1I;viq<1)MfwH@6uWaa+C*-8dP`kRsi}%3ZqbfvulBD|glWcbe+@InYE6Mv|QOYMeo^fIj=v9fjDO?FM7qsk83cc%4fnqlL@vW>4!us zF>c@*HzyOOa+V_?lF94shv@+1gHK+j2H>OlMiGvOE~Nyd43DL~DXL``?;1AD>oW$h zA#aI>x_v(c!Yp|c>L<0?SWAUw50VGYQ7P92SUTkOVRerW*2lKdY|PEySqkS$!t(#r z-TK3NWovWuip%J1JbU)c9;V6&=3((w2_^qA5QC0qPOj~=4jT7?gVLHqjH62466^^G zVaYVC?)?X6F$?_-8#aiiBqs;e!QC{Rxp_kAWF^Z~`?8%~zht(ILlzjzTvO!8xU|a2 zId#QRjT}DZ^NNh~TuyLBNA3dP8KRN+OZ3z`$vOD1Sa!CcIn%x|UIJ#YD&rcY`=PeA9lmyQTBIjX@6I>!EfRqxk z4c~uLlJMA0zuzFB$+B;{7rNZ3!_6}a>4ia-Xa~(TV?9GS#_9Y{^!qARjXV-Ee4M6y z)aE}lQqqNE>LxY`m*ajZhjk(ei-DGol+t@%NZLFN0#cN*kv7*zx#InwiF+;K&InnJ ztv^gG+|N0BC0oa7XO58;ra2+rRGBRM=Ix6NUaqVa)qdl_6}6x+QM)>R%i#2q^D-%? z0UA_$Od_(QI|fLT1vS-G?2*o8d+r3ss6z(?(muNtwT!|^nD<9;SfL<1>tCG^QKE9P zIi6+^=TRN)Wzh0v``)etD4Knkc~sE+U9|H|AjoD738IlfLVy`}F8s>hHl?d9#n3IK zNOP&oifk^L*ZJ7tLfC#BEbys59BfNZZUIBiq}-HdsqK5G5LTTl3VD#{oQkWuE64-lwc{yB%A?xUD4YH zugol^@S`{^6J4B4%4H=G=qraDqJ45~$>wM&U zozIcoGF*neMcNsG*m-yuLvaSG<(kg759>;Hg1kM=qk=N9B%U;0%?o0d-VB$zK2cF$ zDuX|;0r0oB{Yn#Ex(`5vtMt`yhB859>aQH*jrM>tT*gt4_VEXcjh1yb$lsI=G09=0 zquVi3?;i4n;jExO(3=_zI(CvY$NVH&mR3e7$Qiea3`VVu>o#2GKthnepEdUmXDw;l ziXg5yE2#FdpnpK6$5!N$)wS!kdttwTqDlPXR)j{5ApkH2q5K1WBqxs3$hZQT8KGU2 z^TIK&)-)UU0$FR}3*s)AVzpwniFiSpMXIGtvIl5gXz%`(@H|4heiGJ7xP+wiYP#kM zS4(t(T5)G&hdetv3eh4ODS`?LG~q6BrludQDY`+;>rrn>OLv47Y`eR+y8DnOVtNJ* zJixy%{g3BPq@$k^y-b8hpg);yB;Itsg>igIB8?KsxgOv@TmV#NCqo=dtLC@GwXotb zqjb+`GlsGOs!kD=*usJuV&G+N<#!aGcGNcXd6@Ul75lCcKA{%o5BrslSXqUNrtaFa zFV(dTv!KDECEw{+dQ{V&Cao~3giTrY@%$jnpjv=xB5^8 zYF&0<&o78ydL0wFO z6y6G7rxU;b0RBvU~lfXv}qZrW|3V0fV8Zo261H)=)G1}0q$bITo!H66hM#TL0%)VZj zx_WNI zY&0f{8EAd0>NGmIIj(;*Lmd9m94ZzW#lp&@IsTaNtI<{&T4mtWp)gD>?g*ZF>WejA zG*?)dh8r*JJ7_ltq4NP^x!LkHKYP*Ai4QJ@XoC@U24W>dLJfeWLRTmfZ7w5TIB`KZ z!(pA%0KWj^_Vd6U>oRpwh<2Z?Ai?$f8QUvBM=W%{WjkyT*el;O6Uo4%J^DM|5VXd_ z^hAGH5zp?uko>xUyQ>Qaf__5Sdl)oN`TRae>)5Z*1;hdSx zh2E;FHq85ql#VIETShw?rp!-5j%q5IAQ~+@Ftx=`Nx)xhhul#TLm+oa~$B=PNn`JY_#i?rV{+d>wZ-evV18yYWnnxU=bu7$#P`if`rT4jH4W?I@=_ z|4)cBS!zE)c}JCQAjuWN?6(kva*}K^OioairgU;^q6+UF=W6|*a^U_#>e~jQo=Lf( zNGPl%JnWXjBZpx3LGJi?A5WWI)rp&SB_D0$A;axy0$lq1h?8tdOk!)$t4ge}&jy@p zUGv6g?^fgt}L@h z=}7sWsB$t_4XZ<#i$Z$!WPt*IDOt@k$)@qBY4UJf{C8&Uc|B!;h3Xhp5q$<-N)QqK zR({6Z+oD+$*m6@qy`jco4y5Uik|xd35M%A@3McNl%$d>EM1@)i z;`c9BM1tL;R_w*o8~FammHIP=UttPc4INW$Qa6qYg#tXIhQV;PL&KQKIymb5k>QE9 zq7rt*B;~%IiQaaJj+S&(SlX@&t_k#pu(uStIM8O+Q?Zea`p3+!r~{oZf7u}tDk(pH zSpl`)%=6UCC2`(o5bmqCg$7{!lyqMTDblRUM{V6(RaEl{We7=^b3QWIRn#$n4ieH} zr^?eRyN}yakN8eq6`PYu^1tiHe!o<|ad>@u)DtCX4PQa@+i2ND9;-=4QISa?j4tg= z^(IEQNVc@JoYOi1Gpn}X=|4D?VQ-_U8#tH{-6VrB7hD&v<)m96aKdqrafIKr$ZN0Syv|ErR1rfVXT?{8X;Qc@l1 zHBn^q!f;kHQAfjWVjn5UY_f^B8%9zlFk|VC|5#+!w-Ao()AAn4gzir~{Z!7<9tXB@ zj_&kL9pOD?1Fe@;vsm&|-|o#F4^;6JN&bHkTe}0Itp)qy(Z9_kMq9I-bT|VCqsT*_ z3YVTo*2GQN_I%7S(1Q7vCV%a?rIp+D63wHLJJ252x}mjaR$Bf|q@E!MmT0wJ1HHue zz%wdI%nWNH7WZHiSilWZgvo`mzbc)~mMi-sLTJRsQ@U_JXMqSzfqmI48F`pEJEo7E zq^AMo`hhL4CXFThZ(9+lXh?1vIAtolamHDqJwrFQkBVa1=MXjIQf!K@W}jII=nhp< z12*1FT@fwbb4^l-*y?I7fn#LK2}cS8mS2nupJy3wDD9Kh3C8jPjucG08_r=%6}bZV{b z+Mwgy4i~+{wCBuNqOM#j;>6VxrDCfQ?eW3(orO4=icd+Km`4hZjJD5>&*4+nEl*X; zb_->%r8@)1*m@~_dEzXawXns=r6Dm#=>XO1DX#$Q_wf|gj2o)gM^Zod;5A90z78yE0Lmf!znq4feLU8T)d#fq5-JLrdrU|51_%Z?q&>G7Lwni^hoPLE3< z(!o{}wGH2ZqH5aD(7<08@sv`>t!C$H@*ay;pCfu;A3)v4(asNo%ojW((KDMwrOZV?Cir<0 zmrU!wNpFfv=($9F(tS~w4uLIQ)O!^fHiMur-y_%2@`Ks~VuVK&|6ZT&3muVR0?&{9 zQJX^|GY$BQ?yN`Uc*9-bnDP(h8rt}-)+8x<^u?CMP$rBSO{ZP=M7x-G8*}TvqZql( zUtSQ$>H*6-G!n`H9A<=w_KRa(j;`=y>rtLv@A7f+WPiv%7dT~}-E@nm=BPP}>?cH>kajV;g?WOUV({RwapB@(kcZa2hJ>CHXS+%bjBWV1 zw2%8IAq0UyI23=eG%#DM^7!>_mtJ>pHfPT8z)5r{&8&J<KdUQve)?pxIKr8w&~;|s zY?0*-j>gTHn|6jw*TwfR|4}b~3%jHsN3N@Cp(HcMLqqtgD~C1k##@%;GCJ(8$7V~e z&+*innL2Y+7Bh7Pt*^3A^i>he5xXyrDrO?Bo?#Ou630d*zRvVYMm-Srq(of()ear8 zxZ87`z+BG-#zswOoAXpx z5&eDjVWZEGib#>B{vonjc~vju|NHZkplT6&95(($n^z6L_9velGnNVNQb=Gx+vnMV zvPV+opB7t1%qyM9eG^^fXixwqIdRnwho5cvOHSY{5kzDnV=BRS$(}(**DEwoD9~)V z|23$J6ZKv%Ru-|sm&hHkxk9d&?CuJ=HNyjtu8i}Vvy<^;OLKf$$C_|tq$78PA-?>)NIhr3%3BWM^%2WD_Pep$3t|I3}nG^dGUS(nBOm;9fN zMGzZICP}iW^84IEdwP!!cW*;ML)(x`!8X46nkuV`dhDyBRtfUR>pODktVU_z{K(>+ z?PyguQ;;#fs^0?5tU#W z>f)6F{wew=+ZSLqez`_!yjuPzOU4uD zTM28_u{5yrYW1DYmb$kcUfET(cY2PFe|cu}eWSau-6yXRx*(y+I^<~};y3aw6;kO#` zoWN8OU$--*EjH1f3T_((e>evLoJSH85{4G$=E(Io`5}D}6*@_Z zlGWx{pzo+HnGr>O1ZFw(ZgxHgHDlxC_!)9Z{y$)9nD(a|pkfdpW#|YnH4e=6vs#^K zp&b4}G^E(SV!QsAHno+I&Py{HFVxYoo9kkTn>1|q@VIsjHm)?y9%+cuc2(KB_!dHR z;dQoeAfp^_sMX+-0GZjJN>uaXZr;3!Kc`iHX8RB8Z8VX0^|Eu>pe*-fTgX7{X#o&Q zdb)oUO0%fVp=ZEtiED5@rvpFLHcM3(v*=|?xEv5SUa4|V_km@ zN|E!Xtw)Vcr;BguEAwo+t<#rX$9(1ue)rwaO6a?O7NIT=LDzuD@$=DIL3o6@49YrS*hJg2r?XgwtM=~|yvaCOZlFa!(@jH`>6?)CVPy15^>c7#P~S{iIh7CLQc(11 zIGMst1_^=;sD9y!)dq_bnyMvJRUI2|wUjMLyXr+R5EJ*P@n2SGCGrL0aOTk-%QB_W zfZ%lMoR5|V6KOBA94?pX@?CIm>O)4mL*#T_Yi`yB=?3%+(IYKoy31@b}Zh?yRm>W$L^sx(P1)? zXZ;oe9~^9?a#NWj*WPz*P^z%aUUHMf`L^AwvsKY`O{d&D*A9uWxNhD4PFgm1ZTqJ>k*%!$*xYg6nPM3i$knkRB)hiK8JF zbW(sotFLmDc++T%S?b#9ju;8Lb8wWD24dBa4~?dxOH;YNh}G@iARaIkrXXAtDhWHS zm;WQO0}mCxy7qr!F z`!QXQOjKPsSlvUu=!>s1GV~LnklzkWhB`nIjfLrL3(?r+pa(urZuA!Ox;-lR_U%z# z@e?zl4Th+@j%nhh6U(6l(PW}GMDjtDM;=2nJL0AAx+R_{W&};u2xYpiw7Yn}`VVRP z{;dARx5#%CdVV2Mi5{_DV@*$4IzxMM`9IDL6=y9CyVS-}cU+Ox&*5g4-d8YH&Lxxw zo+EUKr0}&z9@~A!^F`KMO$v2neBq^}tZ4NpMtre^Fy?72k`5DMH?W zAx?&dXFmHcXO7c#4b3!3qU2|@+NJx(#F`$`t-7n#eHn~H6h*uHOE z)bS%;dZZA>HhhKu-2GzEzxk38leaulR>NV#|MeM4y)68WLrtCG?pnp54RQ zpUnod#)p>@{Qngii-gjY?Z%~veWuhB{s?Mf+=^^fPMu0HA}(2QABNk|(Gj{jBi#P) zROw_LWAH}Na#?6IycnVW$mZz<6j7#ByqXIC>>c-gWo%$D6NBfu?_BVH474g>nyCz$ z3s~rK5bh7Hb_znccU_GAPS>ZeLDIWpVelV zB#v4idJW>G;B17E$Rwo#9dj8A!(-@!2Yf36hp!5|R@qPaPS8Ha(7U!R@~X)$pL$MMcBVpP(69r*9WwHw{Fd(g6^#Sl|%-!dOC= zhZAt(bev3dp|(ic1#NdxEDWGKW~6-~x)Riglm;s)>`GO+BDb$}AOK>k*k*vKU-i3o zyRyMVxlk!Dt#rM}I48Dxnjf*^a#nYILX6i7?RfscnqLtHP2Bk;zksAmLtBJcRXM7(HBaqeHafw05| z@I(c(fn&(*Qh;3F1VAHyy|1*&`yO~c-1GkI-xnL zq1^ub1D&fV2WfQ!h>YE9cwq>41iXlj?uzA$p1<_ebaneq;AtnCG(mUVUiPzlFw#a^q_^gP77;X>H9fOe>BHm(g) z^V}gd4{JoWMl5?vRz49X&Xsw3c0es(4D;?^5|Ue6vFb)UMc|`~;Oj00O=uwo=?E+f zVF%dQrqrhp)9YYFqMr*6CfGZk$L|AHFoUrLEsBcvr+D1G;#Z-}%F=nkJFdDRb2k}@A|9C9PgNOOo)>AZm*5rIZl6NlL>lX_L7(FGudn+N_;VW=HW1jEX!~@P z+qY}j-$d68cZpivz2d9YHbPOd*8>THL{4L7SEt%yfM|rZC5nB2FO5AhR=8L9P{(gv z5C4PVl#n-p^aj0N>hU;Q%As`PD(wYJ?QvWmrj%zTzeHHxJHJf9v-`bR+|*ju&c?V& z>D|vYUVW6`_4r$bjB`Ev{~F?~DJlB-((6z6FP_QhaTm;nmf+Ijax5oK2xj|7E>>mr z7E9k$+V6Ja#_qrV9=^8Y+Y?F({%#6U`@7;4Eu*s9mJT5H`9EYPJ~^Glx;xAbObeJz z_SE5X++P$U9iKwTTT}iWUpDd7>6}*3uEraU)P1jqyq!B=l`)#I57_06?~hdPwW4%xO?tmaJKT=<(=oKmtLi=Wt3~-qFRMqK+P<52uBRiEoOOlf@;<_uxG(w2tZ=?KMof_XJ035RWcYlnZv7 zs?(dmb+%gTAN0TOFk}#rUE;btceuT2)5cb(=#09IM1Qb>f1(ug6AI>ZzC*vHZQWNy z4HMXSLmsgq>1?lb>);OWrqCV?*2$|YbRC#M~wqH0lG+u;Lk`Y@n$3^+QS9vkSYGkeZ9eTG8XMD zMnY7MNc1C2Gv795)OgQSWo8|dBR_fOV8Y-|ee3dh6g>)6Qi&VfrEdsl!ZeQkqCa6& zjDK=cv~)wihdbHP&M{3=1ir2_1*lit6~Uf*t1H>cs@BHtNMnyQuI=OwI(w z49@KisyPTJ7FaR;HQ^NxE${B`V!Wrt#{~b@Sg+s5y7xQUv12qMeut1Bl>;Ypno$L- zl>6F8+xDV2t+Zn|JPe8tG+xp7a6c%BgNN7Lb7=n!4X(Mr4O@mu3II2gLPx^WEdKG=9q z-gTueSYX%hdv9vDoC#V_Qe93ORE0CzSMWC5k9qhVROs46)<4zDdDv?rd0Dh${T-3) z6Fv9MPXApCCt@sgXQx zL}#^829U7x`GGuL?)RHS-IREKzU@nYF>wiS#9E1b{kh?^_`y`hsGOnq%w;s3jger) z!b-uV4a%)fz-)G)*jrR@@I_}J!spCf(L^iXUojufTP;^9&*M-35u+ucjQ0M`nn@!# zhLwJ1`dC}&i;NfS-bxp5(V=gr5WtG_P0IO4^a z#78GFv;RV@MJB!P%jGTd)(oJ#^jS^S)o+D*BC% zs_Dva9`0kv8qj`g{Hf}uz~c^^R`J7UTy0n*)ehzhxEOB5393r z$Xv~zTl}Ka7aG%>-kcw5_%~~Iy4=w%7^%ArN1h7sKg;~8?=37v$_LkBpB9d6#q~dm zga;^Kb>hghx#6dRahwMP?0pO_nbKJJQF(D#5MkpZlrrQ-L$ z_YO=A+Mm*cS@3T*W%_vLguyUTCk)yfQJFna|5yG~^-nL}V~^&E&aEMXWEx)h3fUR$ z5Exihd#HDyF35j))P|~|?>+J+<8D-xL`rVZ-J^@@dmfz8kI|D0IiwdQz30g0(z(c} zE36T7bQ;2VW6hnOQ%|g-qlfrq!T2DLCH}B{C>mbHw2qW@$X7hrSosBh?R!} z=t6troTcSSQ=m?Kc9=Eg#R zVxvRMFQ>#lp4Rdo`?ux^>~=Xh`uzNL=*ipGRIP}pH)k~&X_;A0n{?vX!}gUmw{PDz zFfjO8CKEePfs_MlG|D#Ov|?g}MxmhaF(hYWPniB0#%SJppDcMj zsnFN@j@z4MQB3p8q3%HQ$x}|> zBvlXT*3_6RKh-?kI4L3X=d&LMKixb+*?{NlX-V=_Qy0+G`SaUS4!7qXy6WHYalH?P z=T>@XWMs@LD(4U~GUBEzOgs=fxS*eQ9y+cP|MQBY7vJz-9uzogN)4NhI=KXZr~jk)VLGCux?w^t15;*4q`Cc@!tOpoX|zlQ#M znkdCPlyD)4r5fviN96`A)}>Dso-Rv0NObU&DI&NhF7aTU#K}GS0n3>^K3tA2s90C&-*K6*J z2|ZP!-smLiG6v@&-)JI>sNF{uj?F)Pyt|JBqeRep?d5G};51Si&i6-ic~$*v@K4`_ zd&ls8k$H5kd{?DXqiV#a6a7{F8{Sh8721X42F@LBm5t2W{L0fon_9blh;__kQNIWb z4tLF5-F8tUauNwx?=<8#yPR>~o>i-n`jR%6A-%8LBG-oAR^I!Xob!M;^l5QfLsPxo zVPd34&*{cOPs||Y)Gg5v9}X<-ZPZohx<44T-p4i8D)n`z7o50`m4s+9=1rK6Nl4z) zoXYau3$Y+j(e}nfqcKfh%CUg!=4Ir(qjI1RN$#NYDDwkibIMZM753+1nB3X|1+sZ% zaRKEo|GP||XCrkuQcn2GIAkU*Sr$Aj`EBTRU{zHi>{Mg1rl4?kX=A9e7p7KG$|X&7 zR1o?mY*S%3aOW0hD_bAJ zUa^e^@&Of%%*4E(TdY{N9w9MKS>E$q1$p;fnup*(H^U*;Pmy=j(~<@oh@I(Yit$mE zSM(tRa>*=4N@{E&MPBmp#S^o-Qq8IN22U|Ym3LK=>&EuQr2Orm7qi@B>tP(TkJP#R zw}V?$WxqVloVw(=cbB6N0;g|niIFy(ez4wW^x^`NC@(JG)oD9dVnUddHY}#qN&lRc zGDyF;kRb9d9xl6)b+!kb*=tjNCZ;F3x#mFQGdu~Cf+Yea9Rpycg&(S92dKeE2ZDDJMqlHKHG^glQaI|G0eXQYo&ea z^~*upIhGv-X3 zym-q21n&~lX+|dNPqSbW>H2ADXTH0bSuXrjKAmK}9Bt9b9W%&!%J$JP+hIbNqrWcu z)d!)x&eLBy{2)w9C+UoHo0WC)nFC{e@{?Cp@Q!;Yn?m%}6t=mm1YO>{%QQM`8ZV-H zV~S>b@54FW@w*m0V!K#>cwdO=iLrX}3Oz_UbfSc|YB&^yx*)^lP7y(luN$+Z$kK_P zUi4S5(uR5_`ltu>m2?Dm+Wba43`=93F5?d#);UBW^zs|es#|Nk1>eNG(_K7&f4tKf zwtIyGYn_yOE0;2hIp%66Z-M<{jr*Q#L6uAm-DK|z#TK^$L?=ljWzcJd z)TPU@+BWwFyTBh9_>d-dcKQ>CONkAxtQ+evsd+LiM-tG7IPzruECWtMtaOsS$H~1} zr_d8sZCsEdc0|n4UmK%L&Lj*M7iz|v+=<;S)hx1nFa=%lFKtf#5o4;~T43$zaZvC` z-l(8eXqklUM_~X_yK_G1n2^KC?4qAoiZsXD<^#d zPR1--Q9Wh_&L{WYJQd5h%W_u{gylWeRY+PnqTm?$NW$P>Cf^4nAueX3}NWV@9s5j_8EQr5xF~#aWo_tP3N_Aw@fu?XG+Z7dH}RT}vm?(K7PNN`|G z`q_gFhlvzrO4>#I8_I}qtv~f<_FiUadj~L|4^o{jF zHAfq>P3{!*b-}c7tTY-^ovlp#PVA+r&*gYohyssL{Kp|C*)k!>n>&>1ytuHKvuj}i zwW|*__LaT;>|4|#pU}NNn;G~*JF+!JG2}Ev zKPUt^a`{r{=7PQU4rvS8D=OG|dH2_3X)fPhPeGDnQ{rD|+@`y#@Lcr?m4U)%^5XOZ zlz_0=9lSqT zd6;yUs5Op~IzaIgG2GW3U7QOyP44_1CBMLs z7aLB{LVj&@jWu|DQHhaj_3@brK`0i(?6nnHdJp5z<&@He%g4Aw_Ibgr!Ac793+*N~ zr>C;yEqj1ZzS@4_5*#Ug$05!7tUmS8iPdoVqZP^lb^LtH{{5|l;un>!unFgKF3!Kb z(BQJu{t<8;PZ=i(f{*P+k%`};k9uRo0Uc)gyl*ZaAjuh;X* zUsL1@%WWB3d3U73@J_k^fJfQz_vqyK>1jT6hWirJ5x-3l`px@#&~QkgIDiT z4S&~N`dQPr(@xQxs;kd=r6l9YfQ!5?R$g5?;wGxg>* zU!ly9vq=J{egOYXPX&c0D}nf2t?8(-f4G{BBh1Ha6ZZWElu4#5{g6v<+@B&PJ0j)M zK8^;eNk4|wRCSG|mYZH(&k;t#K5SWA*+pwc2gx@&=cHmDy82!@EEzGDFx{JXhAb|q zU(46EB`z|g9DOmbp6Au~YIwX=Y3S>af~W+(GE~75>2XX=+Lu!5Orm)FX#8{-dR~fe z^Tvx|(MvwQ(W6?I4<~kq#ef<$TL9ldRQ~e&%-m95GnLihBYt27#bt0g(bqd zz4Lsn&CSi#`_HpV1r=WWuHz zp})t(hJ2$g&Br z9Xf{nB+})Sqsh6O!RuswmDIwEH^~hgpH4ve&-Ql@vLslF8GE30lxNnkDqqV|358j4 zMAih$x8sKHfx)_D@}C`?Wmh}_`i1N6tp16z00)&7ONaEo4vD@j&z#lkUS&K&=Y>=8m3~7awiN%Z@H^K%#P9&(-6mTKL>pu|uS}6#ChL53bVI z(e)Q8V)O0v$Gq)-_Bh8f`CiPnQA`3n%yr@1dyoUuzA1x^0t;w?rcx{AnVP?SX7rf(J3oiH3`3AZGHq|d#`3m?-Kh8Ibu6hvgNzVse7 zYLYk|l(@T5s{OT>DxJreUE3ok@$=h5?%O)ICD#%8%`8@$Gqaj2ob8h zt?Zazwo|EnywZs5e;%@)ZwJ!BUxDKf2&#ASp=(#+p3CT?y>w?l$YQR^-9vjfxAwLY zgYQ2ldOvz>;Mc+~$Rh4$1vWytcpFBG@+T;-X7lnmT_x!OHObX1x`3m{;o)jM?{{Uz z#HWA=G^3%sGXG5X^DbF+;OI~c}*cv)XP2T^&g#LwDsJ(TkAg)X@; zhSHbYRhcx9z9H6~nKak6!6rlZaCr8;`@}O`8H4o~yXf=I)KE8TdfblgIHk-=M$;<2wmSw>I5}mRF27KQTk1n`)BMqJ|BGEQloGz? z(XSHgCsMT;E^s?ss{(NXO9!M$%(Tp-2_SHK1}lI&E;hVLbp|u!Bp~}m-v0_^MGw9* z1fpPlKc(V2#lNGUh%79tY?!(Xgw!J^9}-8CPe+VoIA(X;fnW4<0M^)fcEW5cB5tDT zq#ww#0GHYq*f#_xlDlx< z)0Ow9Gkhmfo}TNHGxQ$?*q>hzv#kS+vrCqIT}hgkJGlg;yI10sT}`6%fal_%-C#^C z0Puk*j#ru%o^sltcA@|SUyl~nXr_nsX&o$3(bk=Qs&ie9fQCAww_$W{c-7T9WnJFO zRd4C$)II{Ly`?2YPXn|fHbL2=2!U;%8s{T13u>!S;ERb(c8%^F{Rm{${ssY2R<3ChLc2Bk(n3iV__Ey?K$V;uf{LEa`>9mC^AOE2a?5Je^yHkMjme1&XAtj_3d*xuE zEDiwe!*A&F{GA--cOShcUxKn`V#OYd%|{mcD8@dmKM-QOYyz?|7Bo!k7&qyz2fc|v zjWEy4Z$J&3QnJl~ZzXy;rj&+>b52CgHZ{!vTnudY^6gz4A)TY#4!nC^+nw@0oEz#F ztpZh0Qs}x7?JDLHX=Zb^W{G3h*ViVs1aAJT`O~XTyxe7F$E}SDt+qD7?9r~Z<~F0j zh;Y1tgl@oe>Aa+Ruhal>fwPYIAtB*iW6jK$W@KsIVDGso8`>6HwEhQ{zx0v@QO0M=&|ed9ws%$A`a5Om1$GmM-rIUhAo}V9|PV> zUJ94B8Fr2>h8=R?%fr#@^}TSDOZT3>u-E zBa?o7G1G-wNF{{h_()}_pQpUEth}PX^W0~hd481QVk^z!!h2r3S58Snz+lcvbszXV zoa12crv#bPB82+k#sPtYCKT!uAhU1I9E`p9EAXx+3%+^lIt4;=7j#|I0aCe#$6Y-_ zeTIEWlX6#f?{11I)B)PLzYz$g64yex?=@@+T~ww7T@cl{aS+jA+_w$RgvQ)F#Kc+K zJs@ib^`LAk{3TmA3#^;_VW6}XXzToO8HcIQaE(8c?`j0@{}*6ejjxG=*kmQ|^}lRA z<^YDWidmn<7NC|lEGbK%s+jlp>w?v!jKNi_zSu&5^0Hk40g&bk(wD#ouNe7ti!w;y z-Pj}4%p9a2+{Q}prq?N91EJ?-z=UC8-2+1C${rAB$8#n?7hZc{=6abOL=NTz_U_&r zhuWo7l=$)t#3G9^+JPD-Gfe=_B!F7Q!}zn z1q?8QLez9~_5N_SN%G%yPug4eJ4@>~QYzN==52wlOUv`Z?9V&*|DAV^u*aT~(@;D? zR&5~d*%tfj&myC zHKG!S2{MKUlk8JSOkU<6=iPJ5ppUi;{3u3mf~pm)|LA zq;_LEMoo6R=aHTQ#piXccJa#%z(f=lw;%Z-6-dc%HWxH(7B_peMtZ3s3RdU3WF+j2 zj(^#s=Dk{l`vB|#!FYuF2s{kULQIZ*suTU9{j<&Zhz>vZW%?ex2cx>$`&fDgb^fkI zaLJ&ow&7Rm`340IsJ(IK&hGWXKEAk=i2caRvDZsT7;aC50x*wf?GYPW9a^q$rCdmnIGZTNv{Q<#=B=kfRr}C;f zg=j&G7N7`Gt~t*v<$JaRA{6F!gToeYwxs^Ce$VUxb~}aranxeG>{8A2DPne$P0dxe zcSPZUj-}(80n6be7AOquyyVTHOx>JYk+oC*O-Z4;Sp)VyR2|4G zbC$g&U>z@+XxShzG=q@aJ;HHXiYExPU`o)HqmR5)Sf-1I_(K96wf!)fYO5&}HVYaj zh4*rj9MnIRSW(VG^a+aO`RtfSzoB;ByYC;hH~NJoshH{F#0r(@cgMd_L!9MxFUeF{ zEg}zk>5<&m7VHgOq0u}E1pi0TWzqJ+d}=fwpSs%*I@_dH-16%riYUoTlnN}h@%`6Uog?W^5hxvy%p`hHo=h{^6FL2 zXjhp7F|xvsF}yfAez*Y_TbGSB;CJuIqUDx9Ka=onobgedCy&4RqW{KO0eWOP$jIpX zS?fQTl(UCPaH$`1KX;;e)9~*9SbhE8wN~Q4Ik3q>3=dy02$<`)>sEF}XR|7n12rJe zb9GGG?fVvOPF0~%Qvi7yJiF6I(F6RdVpV2VRey(u1y~b-7of_-h<^*<+Cz?B0HkV_XI=+`a^xlvUBHYz-HJ&5u_tYTUaOgxZ24sPX7 zIaZ6`DBp+k)d=Z53G7zbrz&$v3cfJ2PH{+}MAbfJQe=y;_mk-Bwga|!C?w9}7&Qm` znCY6oKo=wBmGr}LgMi-B_ULV&UohX(z^9YCDMBY*A_-UMuyg>f?Efcp)+2ps#|Pz3 z@<7r#e$=UN-aF7veq`*eD2LZr($o5V@M#q8N}%mlcRWLc(k(Eo!x|c~pCRX8^193BZBC7{B|RKB8a1)? zQRIojU(9n`QozCrmWp zKvcw53)o^qM?LlUMQlmV9=|z1?F}FP0q95nw>8hMr`|O)E0J~Nz6laDJ3sM=xa$x` zG}?P^R_*V4+2kEw#!FE40AujemqsS!_O}B=x*x}!2bPVwn_<2)%7y~-GC6jH$A{Rn zuN4a&thiq9Z%g#yu#qLXsB;hqiAy`BHg|Cit%N#HvCSdP`v31*sp;wB4S#rk$5qQb z1vQUrjXb6^E6ZjN<}6b;%N3U~PR#_DkSp;0lL^l7nEa39A<)>_bO_^o@tE!Mn|$)RoB;gN7IN1{b2Tc<aXvp`u$`zUW1$ z^zJ2-VVZn=k2A8!IwP1HGXryZqyKtmKF3OOGS#7i8x7MZ=Ql>ZWG`G#;hMc$*o`21jrDH!=S+K zWQ4GS`BZ5}yNs&`*iZ@vOFk@sgVZoQEL89!61`7(cfu{SHBNbWX z9bSd|?hR&@M~-MpIN(S+niLA-HMf(lt2Ngcjncl~184@JFqT`3&#V;H;Yq!{e>g=) z7kYyZ|G9GK~LU4|18$d(WJ9h)M+GnAdqvpLT#N9uT!% zx^r9~GxeE^D?zvzK(i)XO0R?<%>$lKJX$j=n+g=Az`vAMkiGvP_bTqL89tYrLfp-< z8$5B=elrIQps>Gq4~cJ@XPSay9QAr*r z&B>kvyT-Y0vnel%X5?65q?>CI6-}+AMH#>3(H6zXzDi zdM!xJ$_M}>;f|r{W+{LzAafl7FMWX?cKbqI*jrX$q{{^>;&O{@W%0=Ip-7^@_pa`7 zC1-&cb|*l?MjB1VYh|5gZ9sK^g=(A@IwNd^$GE{}!zEC{*UaNv{cXhJSpL>_{cZbO zN(K$`^AFm@7m~doL#-?uP9UF%iDAkoy9ppT|8I*=)YEQyoZ^3D^c31jFDl{wIZ(oT zi(G74j(v0)l;!=tdNvO2QGiid;f^mC!~#EDU`=hguQ$PkIi{4B30_$o3?{`4pfMr; zSeODS?0<-AGaCE=rM6dr1Z(zOeT6PT$^lcinZ?zHkcCcI+dupGzW`~rum6zo8Wl8J zRUn7Deg=ZLJXU8R*4<7zjY@zDnDvSA}lLysZLzY_SV2j*wd60=;-9k6uhnKcH$)L3}z z)&25e=+?}|Q8cPYlm&vgNq*v?tcUTcE|hhNtcmHTVP!Yh*>Te|^M_|_#rT9!gIt({ zo&;Ekpw=>D%*r8F&PQ`qTRVsUh~_!U58eSskKDU$oiJwQ*eJM_QDidUyyk7Pl@{>H&pxyOIJT-A9MsMjDTu!B;x8QNNo$Y)ypkYaaGz28$Tdyk6fGDMo@{ zY_eIhPgHl;MolsQDkMXz2YSw4C$Xy^aIZSncb^jA5tn<(UvT}FTktGduZM1FeTcac zfv7@c(zh|^^g10>iq~ro>4N@EGh+qY9=Ei|4c5Qj?DZj8iPct;!=DVR&)-V$c@>{w zmhX5idzbk#&^`~EQE02e-@n10I+4O9sqEb!I1jBv(y8tqa!|HjH5?mcIlE&Ux^Y$R zzSNEzqJOjS-#7ODKx6>+@}EoQw|z<1$bQ-4_?zg4S#d8ktYbYzrrl>xbNJ6=@#ldS z_qPRp^#8au7Qgcyy!8fss#|_`2|cM5?^ly~6AFK1q4xK=gWt0^Cii?vd;glh^9ui6 zciW@ka!(EPaMZ9)ihOVlUXx2Edw1b-&$0jgYBQ(6eJSYlEukiqq{|ogUJbO_ zawN3M^pM`l^vI~26^rnEZ}QUJmv@;twtxOQ@WI1@!;gWg`nR2(X~ngn$M3JFNltDq z8FSUNcBaC$xdsrA$9XxmQ^3btIMz!eNT{^!Jk%*yTWhYm8&iNu`{_SbpX)mnzq43* zl9$p~vumT4g2Tl5${;_K+~3Y2SB?H#gP5w%>JRpFG~zN!k;7j}QMpODAWJ> z;s~n$wf1g~9>3#v6qpjR>9}{7Mu@ujvn$Ci?Vh@JsQ5;g_OQQ${vChlrDyMd<^n2n zm<&9w#|B*7BC1-a%IVz~>ECI@Jwh{z^m}P~^x~pDkPT%Y!;YBD6naGdOJw#UsQdu$ zJDBc#RupC1D+Dr%lE}}>&muap>S1Ke>#PI%A?9`()wJfHOm)Vs?qI&d^Syem(h07O z#hU*3hzmxi^@<#4a&N%(T5jPD7E?4Rx<_RdR)@&ePkbISrF^3yTQ=YhMl0XViZz&~ zzCspsg3{$lv0%}i{fIUJyv2$T!e(W$_NP4rwXQkw!@2A-2^WIa8MA3g4>{@JA`&NtDp#6E!wDK9aBKz zHdkeo#^&6TdGZ1rLkvK&o)mN`+5%+D{f4vKB>FUXN-fZG8Jd*UeyBxk&C1^{>RZBX zp(p9_FTysQCYZ{+9<-6>{t0RSC1Rt`yR;3MbRhh4je*DDDVS5rvpt`4$5t5b!zq}0 zfvH@Z(0pl~?_&Z2WLJ0TmfvXwiIJcj1J|~ewZBs}vt&CnHa{J&ly$j-$qiPBFPUP8 z1%0NsVte^!WPC2oj-J)t2|8$(`_8{NgzgEWjr5SK1!0jqI^(7SC<0NziJ5ECh)RJ! zJ2~{zQE%Mh4(4syiB@8bWmZ&-sa@<%{Zb>TElf%llzJI$Bd-W&b*ix@piU^AVQuqV>VR1_^v9SEkG=l7T?0dbW5095S7CPKC{O8b6F7_K zpzpqv84_HiXa>vPT0Jy7!QiRQFAf*V_Z!9CLst-qCYrF17Cgj!B9m%c3woTr_@aFA zB+UEiu4ltHFOJY;vQ}H~W4zt9CNp4Zl-7wG3pT#fSVW9h6QA12X7?)%;*sEdPC;c| zEu0XDdxw*xZ-2(%ewXkO=Tso2a>xbmO5$k3^g@;^hFg?J2J_!v((hVJy0R@FsJe+Sg8sJ(gmd0 zE|_%lowR>KZGSXJa+_4~)<09$Y2v)qG#~nV=X#o%g5^D6w*!o7;1I9<(mPpp;?q4^ z9i>s{@cE7c?}@kcSXa23ZFQ3xV7U4hgZ%=1YGL!D;jRX4hm9qS?R_*r<)D(9f4G0j z(zCWqIP7{BgoNd@niV#_qbrD#GJZ{XL$kB{E033JtmY%DeqxTOl(BuIW|g)6J3kp# zJ-mrlCc)r$Q$w6Cw%9V&ui_JjuhQTGYy5~t%EUL;V9||s91lF31BAoLOW$Ti_?CBs z3XiJJdJTUhk47w!*d1SD2Az9I-!#;zi>(TljEoC>C*rY-1>8#S??50gqO>uEk?j3& z!+F86vNDTPmF*%<`seI3e4&Ia6?rS;{*?DX{NoEH!o9RTh%}XTl0xPcEUbDw4%I%t zB+xm7(nx79cyWNu9gJT7Zsl_^PhBrtfkS@&ZHf!G2n{$RQs?HAHu`fa!kJ#y1X{}u z_`^2L*Dx+KvOnQ^(Y9~7s1Dq?!TWQ(DkdTy8aX#=M9yKeGAJ2GM`&{_fE-rTR}v<_ZA+K)PE^L zQO&nAgmi|{{SM(nh$Xq33#*h{JF~Osf*u5=VnuDFCuEPQ7n)wp#6Lp6;1@@UFIr1G zk_G9BTSe3O)HV(zbi88nEV&94$7uTgI6NRWvIyjdYId^89zkD>J1plfjjX?E3Ko|lUI zmt~qP+o0}FZJWIqyCKm-LpW0-6M%Otn_VH8_R$tGdeXZr|9uU>ysSo}%{Q^zAlGyd zA+jPgUYYTe{II^bD+Tq$)6~oa?xwnXF`tg_DvF+C>9PTC0-T65=lD|=qU4xUVKV0Y zZF>RNq^G29;`g=W_ifA7?LY6ZRt~1$=hBAR+hmwN%s#-!)el&7CUg6k4nAz9;V_ z(Ib~bjuc0nop6jMLD65nlTk`WFhCFZQy#;vwnNI}9+ z-2kL}(Wo59@k@`swRQ9mOP{6sX%fDLw1Oj9@cC0yoQ*weETWEXQ{=T`?AGfN2_elg z6VCBJ_|DwJnC!`yVls_&9b!?O*Jp3FVRT~LAKz{%fE!y)ymy3~Ip}|O#J&U`g{vdX zkBImx8%K=sY=z0S*&0C;A?nIn4#^$2%G9YxG3(K@yK+#4(WnR?2Ug*=F^=M`I-Mx4 z>#5J7%~s3b?(K4uMmXJyI;vs)Obp&*8R!@emQRNGCMll$0P|a$Y0_4yQpGMkucFx8 zYSyk6cs!ZHdzrx(E2}cRQ9>IL40);OKg2%U%CLN_E5 zwvkoMyV!(&umV%c3!KB2D1U$(v@wR01SnSss#STg@b$5hs zE)ecg458SSKFJN2e~3jFFy(_G=}4P(;;xGwr&|qt?UIzC-mjKkj)C3e&nwOM)z2|P z?2Q~34pn|?V-LNskQdP4t3@D$F#YmMG7}2Slg{(?Xlrj2%~f-%miCPH)FavoSgp2% zypY??>Zik99l43tg34pTAwC+v7EX{+(q&#TksvG&G~+2699 zNZ*PHnNsXjv)+s81xC!31CMPJ6sqP^n}A7y3I8naR>EE?#;~hOiH{ zy@xtY76#~vMLYLm1W&trw-Kk;k402*ablLaw;K5TxLu)yHF~9~QFUjK{}r_AuWa;K z6{~0JZS3g??)%zK#g1b5bCAGRh2{;n2wW*Wt68eSxMQOWWMDBC4L6f8K1$L;w_Hyq zvCy|e6}8BVJDXl8=bhdqjUuQ1eDqMiv&;6q_4(_`^V1>0SzrY70_pn6d$UnGC3dc1 zlj7))?jN1<*q`THY& z`Hi>e(E>Sj@3G#W>41B>aWr;*cb>;JJi?Ck<G)Vcr zjx4400ztsW^L+bGVEu3>uutZ<$hgWiumkD@D3W`^9-5=a<0vpqgBV`OPc?w$`~FET z(;lw_u%a?CG<(Sex1%~i| zHEyi}GiyM43Av|q>tMN;34n#DsSKBO)Y$p=rSJ1 zyys>*qojkk(Ld|R9`MN*`L4ri-Ww!aLT6~DVzcR+@Kz@Ad-}9_qNx#iFuMxbK8ZHd zyRUAu9bpXx)n8FP=EWEDr7G8b8Y)0GidozU6K4z>fvpx0|@oZ zip2Pkw9QAl2>@iu9_HA!`Rj?;NP6O6v+r1iZ_l+TnWn9j5H}eq`|7x7_uCA!t8%%A zoOcNH=q5Em0f;>-@QXvtkqmjJx+Vo%Z8A_Q{k$ZpGhH>sSV7IKl+`VgB$M^J+Bs~ctM>P-qJtKmKBIwc8iK+QLw2UWzYPr>{K zq-T$zHB`#sxwYOJqr+&p{;$&A;@W#2zpr2?3{qsXcYAaqt2F7*O+Wz=Dcq1uR#GUh z4Y*wz`JUxS4XDNw(nOo{%!`02$Pp(!SJ3m>Ix*{Q3!JgS*W<%_vEjI;qMt5i)_=xV zmO@bQ&BN$t^i9~v{G}g{uP*!u!5C$YA$(zN61Ol$Znl&q3XGWg!z@3$jl**+)r=aM zl+@mA+x+K+?(%k$#MpJk-Zegst};V|{#3ZNfbDvAh@VoEDXeOeEzt9`8Y?Y9wKT)Jb8hkU(YWnn`KGFhG;RL`;K(J(s%O zI>|a?>qQ7jU&JUA?nH8@LE7W-;(oe|VluCE)a`4-=*I1KE8mj!JqkjI*mC)g z7A?X6gdv=#u4oBxi`xRP5hSqach5c6C7I${8^VlEWDWEVNpo;D`TDQYSK35tg4Ezx zhJ{$kM`LZ6^ZTiY)pmG@J>ITb?Ki&8S0j^VG1qb2xslipidlv4zQ#o*A_?mEb+D2` z&6OIkPri1yGv1-ZjafC?JWdXlsFehiTl81XY_IHeMvyiU=4UYMYFN0czq zW=_)gcChrpA+c`Gx5eUV^`h(r9)7kSbQ1V1UTlq;=U@pYtsn0oweBp!bs2|uF&au! zF&xOZ+mk@2rM7p)I|&3z+VLc{C9k$ zv6}ZbQ$ku5AYJ;gB?b3bIpm5ON|+ojrt?Fo=^_0ciPpM2;P(de2EWl(^IA z`{N3CQ(djDM@mS0mXqOl>0~^yRM30pJUXm^R4Bq{&n9a3iB(%Mee%zoCJXz9FdzK4 z;KdQ5#E#Xs1}5N4zxp+So^(_0-g?>qB6rX>e2erf?-y%YUZd`~`Df{Zan>$m&5mcKgF`TK!>Zsf3k1Z4EgR^5-e}n`t>KdH^1+HgO`sc{?72f_HF{fZ@PD3Th z@4ESP*_ByY0L~XV);!r(CI}f6W>0^abUzOV~L3ZKZVzz%Ny2Wx?I8BzrCqyq;%sOLY6M@tWrt^FN5cH` zCmYsjE^-ssL*~hFUCnpL%*#mnZJ04t6clhcfOf#+LXt?n=!<#R0e2m6$EfYp_G7zk ztJ9tetK~6p_Y(tM;0i{F}?t7Tcu`*<3*Q&>x z#|XM%;NzQR*5=i@{%D@qyO=LH_zzZPKAA{o*}*M=p9l$d%$}^$OPI-DOwjsD19HWZ zb*G2~zM46t>yEE#q*;Om!_eG+8w_)U%R9$DsB^ri!v0SRR`(ZpwJQo?B01HyeT`zlvb<`mxav^5j-d z#iyi5kS^YF-Gk#*iSq4s_~ZZL1~sK+??w7Q2~K-=>m5ze>;8S!?AB4?)vh- z=Y3h8K-wv_N>vRh8~k0>i(iu`YJ5VC`(YB~t`ip;HVm}_X%K+~ewFyn`^}<({mpuT ztSV=-62>1$m-ZZ+W_%vydc6Nw7lvl(ibXwd@$qdx9{a|-97i}~TtzPIH-as9>|?mE zTQpbsKw{NvN_#gGkR$AHAzuRVLU5c+8_(j69BOWIQmqzHeaB)hm^1E|a()W&RFLVq zycF`4{;&RjOy9*EEv!4PD-C7-d1HYjCfc$#OH~3Xa}6iUB7|9ZR$Mz2`KaY+c%%n~ z4lSgW!fiz4POEXt&Tk0#E^2O-@II4pr;tnV#wYIO zsAO0QpNn+t_wW4Md}HqiL8%?gjtmG{z5 zzSsPM^RsjZji0&CJB^O3XrGzUYhHNXse)q{O3;3HF-Y``{Dj!h&6i777F62@>mOk) z;w%qezR}}h`86$cZ~fOFPf!Iu3!Kqhm*88*Gs>daSpN`r^jA_p3_w_pl|O3U_%+|# z@Mmo#cY1}V{TCC%p$Q?M!X**ffeyG6FG14zVUGkY%hgEn#JpFflPvr%}f?|SD$8mU7xOvF;ooDGzH!aaIFG@2Xp2r-@W)>g7uMm$!@tU~g z+%4UFB~Rlobr&Ap>)1SZV^pP?NXLzLHMdQ7;4l=($^vKf1I_M3I}ItzHI(5mCmbifexSbA7QfNHLc@IbPCk4QEHDR3OMkL-@M{{9 zdL%F`5FI#LAn%za%NWmOO=4*0rj|b6hvEGhO5Le!$Krf5-2vkKB*>D`*2thyeBSuR zgZ&Tt;Z!+|QfQ5pj!>@Z-jxs5j23tA-mX;c9~=mR^I+wn^2?6t4ysg^G~w+GD?hT5 zQYt^eqYW!8#7-DlxE!xozP&x|XEP3wpD%Z!3Kwvew~?P`5(yz3XOYq>r%S0!d3aX+ zUnKgF*ugr6x!-(AjdXRJoPN(E_xr{fXT6pw@{kS3@r9Jkl8b>mYNX4}^=HV+lK15t z__el)=0}{1zBgL7OugCr+~5cN^#hdi^+|FLIq>~)&iSL}Pd_)I+FeycKZwaM?0;gu^O|u?l`H&{nP%(PW@A+NawVI3 z^hvN=;$;m(xk@IO@RiwRi{PRSqLP}|14Wh-T7=-Sbk5Y?>#CG z=|#Ay%ez8uDpR)+qH6zOquOXeGx_pXv^@)z=kfA}TPwk7vOc5|j`!l(0L*)W2u{R# zgaV7(Cy~jgAEH&wgod55gC3;F-KNuhIU#EgGo>U^y$Bzt&>1ZQ_Bow4AHp4S5IQI# zVOMeFkpAJOX3!024;pM~0+_Am4=2Ml`=lEJnW!BGshpU_BiBzf9K%v+DCz+gz>YJS zKL$o^PHMJQBL!aR%~E}VI2U5k16N3cFd;HeIY=bx8Ilu}c5TFw_47Jd(i4HJh$)OB z4W9*_LK@oIG*z+rxc*6NxH-#)X*_kkinQ|GvOml_*a~&Ta~V_D0o&TsaFuX(hK+`3 zYb9ESOSP|O^P17qyD%@ItGvH3IW(-;g}S3@kT~j``{ne98xJSHcMI2T4V#goDxo2} zMxLi|63s^qfippGhhSt?^9##jvC=x36%Z9_v*ys`O7r<+F+q%_Ps#~aM=Kew9WpA} z4DZAT=FxOOF3Nuy+jh&3sVI{-7Q)0{KQ#Ey~lX~K@pEgAgU z-H4)Li2Or*gg{M{S*G$_VgHer8uQ%8j-4i@vT))E_mvEcuQNayI9J6h##n)AQFKz| zkGCx5RX0Z63!By0_^3oJlr?MT>&+51zYXoNOpPNT8=_w{3_&79mBt4rNF?hu%#W>| zMQ&Fw)eJ#*yb#_v-?FP~s1?<=U(AX{zr&;HQWO$GVGR{hf)(F51dFdyhnY~ziC$y# zW+a(hxo}}q0ZvyRl#&+lNtqd|NiQes;4m~?lc^IpAH;KLq^f;qsW1^4*AOZ za1nds1L>Oq#^(#=cV^0qOny~gxDY)CS&)D5&PRXy!I!T<2y-DWyvP_+z*~yRNYQvb z_UBG$PbuYpVs|g%3CPX7u!dn7l69DK^5(e8?S1wS?FPJ}$O9RrQ%d|CIpmk=^s|<& z86MbKdN!rAwjzpG*6WBqL*B-_HCz}>B;wmdKeYeeDUss!^E1U2aH=7?A@;BrhO)C; zg9v>4cr!*Ld|!-+hu8)3oNg#~5m;st;=jpFdQy5BYzM5lG~e4gBtxZQ^k?dtn?d^4**a7UDPP1qbl zFtQ9)vrW=JK_pG>K1o`Hw}smOxP3F3fcPMsWS)fYwvg&FfjvKbtNx|Ks^W3^pz#}0 zq0f%-Z_Taq{idPG6nf*38bu|!^v6jZJG*HDADZTA6u34wEBDfIO5i$stfPSQUP!6I5Bh0niWZ!^j{l($Ajd#`8$euy` zjQb}+|74fSP{WK{^NS&>2tz~aAs(a7p?9c~-yVr{^?|Smd^3adkFArB%d9&j@R#D7 zW*d%qJ{EW?xqKq_#+KxQxmVHPF{4tr#Bfni1VSQo=Fhh5m#0EvgLFf_#Hr?(AJDf< zpDTC&ZYCfI6b&U-UeK>hoaeCpCtI7n!#3c`y#$~ zyoX!u9-LmJ`H2}K8@=-=kY>#jwL2ZzR`&&QcKc00dQkX)E-$EC^F9sE1 zN>ta`ULR-a4&Yh7|8+BYBtiPZ*F)dZE*Zpcwg)JE9GYgjZQzWu^x0O3iqdyRO{Gv{ zPhUG(hce*_Gs3f%oZN!TzPM!NPame|eaxHv4}XOy?mJ{1xIVwy*fi`>FTK!K4-pIql{caqLTUiU5&`YBBIvG z*MtuzQ*(yEkPkCiS=13%oW(0=myLeLX%ojZ=n#UEyTVM0Q0LAHBWg^`lgi>lB5ebK zPAwBj8X+-_h#s6iyf{(dJIKhRK1C%IEAkaN1YOK@h=B3bXGUn~&UteNrjTkGq~o^2 zKkh49zr{JE)8?pUVWp&ACf7(8+n*SxmR0x{q1kDpRthmu$i5+v}3Oq#n9ib zG@l=WEy(_n@#BAj4P0ssg*54{jM-NebZcion{fpgxN*<>%8x&KQ?1HtR)%fK5^|v2}lN?eQJU)%| z?1=EI4-=+-dD=GC=G0Hp5WVTZCaRTMjFESAmiR~wlLPfLxT)mcf%8u`XqJ{U4|?lx ztb-1&VD*HGznl4L8m}SZ1#A~7hOG+Xu~hbm1ty=Iwyx+-=q~#bzrjzgYAU+IId@E8s6;Yd5GI3mDLOGf3&@^<9hOnouS`rL>Vl{HTF@#YZ?A zpZshVd-{*LW&A3ijgy|CDo=S6>zRMWRGs-viPwlr70f|ftH@|R8m5-$#qTL z6nY!^dfi^{^p3*HO?vG65IAwz`VrR0CwU=!Sj|1-pMR!O8h27%0{moP+2gNPke|#_ zTmqC&bSPE%dp}o1dZa47Uwu;FP`z;b+Qw$(cdR5>^ci_*eVEn;!9G3B^Q*Ep25p2K zig4aAy2FdS{}LqW&}j3yRG}~lqFPD_22Tb76wG-LrGN3bFFjEfKIN}qnP$4Z0Rk3P z1&aEOp{V4QZ;FcTih?XE#Y{cKM3%?dK!OvtOABX+Zdj0vwMp{o9@a*!2h%vLm^P?SxfO)0hex0m^(X! zlZJCdEGNw(?DR8zw(8LBHSS=pp-qO^Gc|98Uo_<0zM^lD_s}Ez;wA6Xbe_YCnGVAC zEBK8Qmqv^Dn?5tVxHle`9L~IvkdlEZ3VcNE;1ht_Q;*&`FfCl4kLQGi zw<_+!@o%_zQ&QoE3G392&ta8jqO&84&7sY!Y#qO8k^`AlMJ_ye<{*@TbQu~MT|2y9 zbH({K75Sz8z{(%0jS2xjlcz^3Q@u}Y9_IvW{hBkB*+>-kkB<6uks|&~}A1V@Xq00f`9rzE6y%Q)@_H**3+gTO& zw_n9Rvr3MQ*!Z2B{9{9Cjb3(qaq0R5M8Ez+DQ`_&O64VlUqg*fV%U!;kCaNUFR|N~ zF2hAb#85#uDJ=f2Kv8qhkj3VV>v|bm?7J1x4@pz6vz1u5kf%D9?Pa?o^i3)EoAz5g zs$zcJVAJvRAQh$~Iu`43ZU!d0WkISVK^QIkUWvXJ*Ufn!h|wGyj?oGP3a~%+HUL+IU zlG&bp`r+vJ()wWNy>j}`t}b^fE>VOlzGXwoHsi2d0gSsn*+HKtEX(I)_o=Imfr<)k zrS&be*o7Qsx!aOQx^K#hL*czYHHAc7-hp){U0YKlwoKeU-}F&zH6V;am)heJF9pqG znY9&9#7~8jx|7LXM-Id9DQxr(d*u*z0fH zif-}O%R_F8MS)`VM9+y%{jbWBrAoR%wLQe!KeBp=-lrPFlmNa-RcMZ@{gF^IYV-NH zqR?70Ip#R%h&LwbcdQT@)}^9Qv=wIf@c*Iez2m9;-}rHk%wvSCtTI9lLiXxilohG$ zO&K91^Wa8glMynGNF?)Ek#+1;ob1g(9OH28gJb+|eSY8X-RA+_ z+e)YC)kgRuf;Z4#+w+X;93Oi2U1)3#8)w-@|Lj6bH0>3k=harT;Udn{7k)Cqg~B$1 zE~e!?=n}gH%0<3|W3e!5{aXj*>R`W|+W=;%GG9Ogm{H`-wR2(fFEd`?`ozO(c{49__)rr8vo}N zU&ZO;SN1zM)PH=wTYv|=0$9&FZf!c_sE#y}?Y2?#p~xArG_wg!>lwXzFBrFXM9kjZ zctMG=cPwp6lnar8W>(3Y^g+YB zA;(GlQnO^x$eQQln6YaY@$d*iejfdb@YG*iI;Wv@z_TxA+{$4f*L)Yzw|aY&WAV$& zv4eJ+1Kr+Rb2ZyXN(P%e^DY890X$fsh6y`j#Uio6k$+ z(|P50>&5MJBb9^;7bEJ zfDW`*ccZ+9+Aqb^;bWw3%-D6l#}!Q`siH6BD+-!UC@bv!Aqda%UZNk1&SYlfx3Tm6 zxS)XTDYZTY>q>Oh>_mJ%k2}dPwp(-MW+A^yZV~?Q>+od%Kd$QIxfRN=pIt-MilpTc zDqG{9#wAYzZ?;r^a?g}org{YS3XS`}sgd>Qp8GykYxOdXCe6H%>J%B+_cO*YJRwK5LVvucQLwd6sWDE5J!8#cN`=>Ppk*y?=Is&v*FPF~VviuP$s-i*KL8Fw#)C{pwo7tI|6q$>cG@uzg!Jp9G(Hf`?^Lm^&L z2XmD5tn#@#OjTiqmxD>5YhpiF$M1HaAVgPI&t9jB(x0#bR|2U%#iVRElVuUh*B@s> zxH~2)Yr1<^m!5v~{dqQuIJoc6zV1M^E4>ETc4`pzfcyW0uzzQKR7~dGT4|Tn1F=c zq8H5+y4Z(T>rmaL%9x?q(-7^V+r82~6u?;!NoCS*(+Cw?sQ-t<{CiqK)aD(BLTOF1 z?ynd{dj4h{NG*IRI1@8mr@nNhx%(gFq%ziq8bZAl+zXO2VlOZ=>S7A9)~vh4>6{%d zs3`1}=;7~*w$EYe5jInEP28$yI##7SkRsD2m>@}YDD5NL2xhaUAYJiEDn}(`)ir=cuV`D~0l5H!D|xLUt^9mAV3%TCbt6 zeN-_1=?^Qe(yNG`o1^%v19i$qGQ{s!PIK<=;b#CcLLnmO!uM!49FPXzZwOr@k`V2w zb$`~7`p7$UCHIg5;}nOStT;wT@hylC`*S9vM|SdQ<91@O=!zniL%J<-BZ~q9TxeOd zUzg0__?1RqPK!st<2X{(wjQwpiKzAZbRyyt zE#P(!5j~rY_q6J|Nh;*s3uutolQ0;_8u19qViT|4wr*Wm`V7=F?EPJjJHZgulgnD8agZ3%RJ! zf!yz?jndbf<2_`qB;RuFK8gX$M4pa$kxxMpj)ij%Y4gmjtM!hJi*|o$R~bQilnH?z zf=*P+U}@C^xiwZv#I%g7Y*m@?gbOtE`v8|f9HH;_uWqR;^*M+6D$>Y>mTz|Ct!j2> z4NbTv_*L(-f+6$&Qi@|D>`3&lDqP?)}d95Ok`H-@P zT*;C$E-(QrR=RbnR$2S?-RF>;5IjkV;+R`Z4uibH(e3{k-mlB)CP@=tP7eX*%JH-h z)I>AVaiu>-v3_a)p=J&nv>UTxW?})FxX#fR$$xLZY}-_DBuoFC%F$s~+BA>fOw+z3M-dRN}YO6)Q_|ij& z^qWoA-$eUR+MHuM&H^R-2rHnjO>L!`!$|C~#AF~+S?`Ab0K&1dLqp$=bA#7APK}DF zSQ;v*Kau?^Wpdg3R$54x=%g1-U1iG1=T5c_|M;UUY@WJ2$)^wryDY4z(wH0F36;F` zHcSow3x#I3QsRKN_)+LcDU!m~XxIHpA#vTa6@5&BNa{26ax*IXCeWwx(-*oWHSnM4 zlpQ{@hv{H{2Gu%8s+=DD!qA%@?4dy=2b?kLoLqIhlJ-tHhTipGbfhvV&?dtia!#^{ zyp&Id{0kF>tf#=V78Ohe{zz>KnSZeU_|2XA)K~7RIdO++^LNK4|4UTz?pu0$0+}oY zJsCY;Hq|FKkDwl4ocA-rvx%W{;V#-Xg64)zn?z2VDp_Uz^ry<&cYT$RU!HTs6eQp8 zL~QolW0bX)QH>-0Emq?9*2FUG?{4}*+RjmeQP(U{2b*oIKgMR>8n3NOmqxNyG?JzF zf-||)2~8X);*D@5bE)EPr^tKujdtq=Sv`CE&r73@qd9=(shI`=(aE6%7UmXiB^y~% zsOx&fsYPiNxA+u=QhjP+ltFDy97`6|xg+M_Bu{ZDiQ*Oewg$Y=2F?R?1UpPZ#>C<_ zo^U1RIsu+IcY^HSxkb?VCi$_H{(0z>`;xO%_@%EH$|&@VM_VD%GaO0?-;8hi+es+m zYHgH30&i~V{KXXwalwM^9{1Qw`B>NM{C65lL(sgqHvB{`%OIH6rkn%dzg7|aSV zuyJcArdrZHdBaPEdYp59gliKDUBVTR?Gl zw&6S*AsN|+KI?{ZUuHq}HmAnmlivmeHrB*MME#Y10&)0I0ap>XoI$-v% z0%tvSY>Kl(`Xbu{Qgat2W~Io>&v~}QS$obYaV|vYiT98=|9j z)mAk9pX0mf53OhXML9L2i35VMVc|hJx~~IKyC#8;YArPa5t;rdw{exD99Twf-q0F~ z$E18-Ca1-$65e54S#1wm%`O9h3H{8Nz93jy;u1g1%8sPYsFIcyS|fR|?-YJ=j7DOP z_1t0f5`Q_`k&XsuV;im3-Tggoy!B@`upY+&qSxH(Nh^W#4=Rkn#S<6$(D_jDt!?Xt zF2Z6251{e;)MiEMaU*>4)6pmd6xp^io>AYUqH*ag**0OWNy+w0= zqMNF>l+?*2s1%97oFt6OQ+U?~aZtlfroE6#iZVig_?X9iA z6NU&DC*-W?WPsZ%7Cxs!i%YdT3#ue@VY7>zSvao1q6Ty4(Wg<956fPEV-B7{k9t@+ z2PSU0$jfbI^JinH5gyaEl^rPtL9QT2V!yw>LWoQ5FQ7M9LXOydx0_wh_dKT`s(db( z&um8U>MkWw)08j2aKyv8)mv)o{t-U0J}$p4?h=4~Hd~D^%?mJoB;<gi?PSG{ zRo|CC$7IvDzH3?hK|O*w$BmtrfslmN;?6kf^$NV2!wQ>C^UKq~OBR$|8dcBk8yG>v z`2!`!kLo4t&vVR>-kJ6Ib@OpE%gp`Wf!}I@%wy*~3f2#5Pu^^~_~oNK)4Idr?%D@w z9uNQ{U%b+yYZ4NDeH`9RL;w^g>`;XEiwCe3(aqBu<8FUnMOYiVSEUT`^W!}tISw<( z8LFB>aoyhq7Wp|t-*~S5JV=^BG-UL5u=e4;kchlx!JL9uv<@rVj(rh&4x6$5)`aQn zqdB+nF~v^w2! zjh6p^%4VkCHdQeCdO1m^VfU_HE=#F!08_Kwaq-gP@7LD!n@>%;1A z!972^L-B3mW{HiK;9`HwZ^0Rasjt2LvEJ@vlS*u(-7-?^_b!%H?YgDg(9 zUwos5KiRT=OZ(mI(UJL7ltF|h(_@Ig)`_$$)hQIA2K zsa*1>ZEHf6VfHVEY{1_l9f|J9QRV$DIrcmXn6n7oI4`Od-G@<*1n_UsfqB{JUp z&iEYecs2vxxjd%EQ?{eBLc`)*E2dq&VQh7GorAcy@D0dBqH}UomCnU^KOo&J(MGcz zAwJ10eu;yWq7<8b0F6&Kf8tqZg}qpI-rPkRxRmYn!_V=zgagj&N}BX9VxX$xSm}m} zR3E^x*q?p+4Vla(N7j)#XWhuq0!=`d|S^m+)&t7#g>V)H1>^Ii+KrlIoTs+ca zFc+Q~e>EkR9Tx8dDcX$9I6^$gUb(P8jLwQ&ioN zd69dqmSrL5Ey0W?3Rw0jTk2bP?N0>lTfZp;8C6!yeEX4p$DhQ=<0oek0tyhO!eJ%rN;}WO^vw7>IG)Gzgi?$*tnPi0x72B1pBNb$?-@EJFS~WUCHeT>XYjZ zJguJHFV@?()=pU<;K&84ZE!Vc-CQOnPt1>1n-CbMW~0mtTOn-to{O<+Kj9&O@Ib z<%fcP}9xWQ;x$(Qx~xK{eWiyCqMefI?a z{??q{&1vbm7Cp829i{u~1qgpE-;4c2?U*AWP%^-8*#FfUouoODbjTJNpZHXBHP1s| z;zXuLwn`{@V@=*Mj)t{0^?8t3`COu~c-?jJ{?9^en@80{*r{=q$Bguz-No-(<=Xzo|9LG|)?v9fP7DJcJWbGn+n&S+sjNMvHsDXnAYso$w}4yfr6NlzEO z^b|EcNAl7`eWy5tQGF5HR+@CTG>$hRD1A><8|=O0U|VAbNydERj&H81wHvUvMT zGai8!6J2sqhj+Et9%_Pt_5B4lrw@SM!QZGHvry4&$(!SZo3X#_kArd-=+A3NBBg-; z@>nC_KBNvxr?q541vu%Sb2*Y$9kX&8Tl5Px_E2W$PV&{?5TBcN3x`_%H-~-ac_apQ zU@%X|>Kz)w@Y;4sYQ=S(d7jEnxjgL5W$9(9aOhiqwI}=Frpe7wP!)+y0?_dx6x*zE zh`kRItiY9N{E21Lxr1JJS*x`2W-#Nd=`($c+&T~RZQSl_1D7i{bFfT=w9jVmk8f(=eO-D_;;?&#M{di{?@e9RLZov z9><%3)7mW^?gla9)74CX(Td6( ze^tGXC$2A^TwF)Nk&M|qLIm~ghtZY7p(IKB6aEKtpifNrE~J_fSW%=Tp?dPf=1o|) zDet+mc3MzF75bq?<$7wl zSLCh11oP_zl+Z*Ur!|mUBw=)j=EVP@J(ayXkEl>QH4L$K$aYMvZ&GXgjB~%{v&la^ z=ZFf>&~17{Lr^!*-BM`OM~;~YNU-SwNqCU!1+FAvQz*f8P1#-9Cr$0yt$7R1+2gyW z5peN@Rf5zGcEFUi^@onqME@vlo3+lDcc+1Ysh<7vDrFuFovlK0AQOP~*8z2E1!goJV_SIQ=q;{0{@2c2U+F;S3$EHR9|4a;*K zNP}z^9)seJ(jWbkK)$|Q+aTR76b`j}=A9^QHA%vRzifwyh!c)C1BLfIXz#^Fp%IHp zg9rLrdt}M7t(Mz8aVlkeaYnTG+mXnNjCyULjSX{|lwY0Rz5=KS>WGVyyF2~D*VG-+4tqan+)Ge1$Ep0;|R z&ki-xmc1>8o_z7pSA>$LruJK&$8J- zsRSe$iw6HU*ZXYFoM(M+YG0D{P_~N|N2>Gg7WmO_d|1!!!75(Vuaqy2#DNh}&DOKw z|9iC>0&u-|8HGNcN38UK0htc}mCe(NDC$PDa4%et~h82FvC)6H8r+iu1>)s0;`3 z>D}vTHFcVe@{5L*C(O<=pqNJHoXvh7mjR0N4tqU^%FThk)e=ay*^g4}lX--boc!tg zBz}7x$EH%`i+_+7pVo7+X|iC(bh2!0&Sk%T;vqH+^uE=Xf`bPiAnV`qxrXK5J6xs> zLago?&+j_+?_9;kjWC0Ijp3bp?|nAozaHa;iXo5Z2O*m8QZYtgF4Cxq0@!WTa1NiX z_PglshQ|pKNy9fMD@oqe-GRFgQeMjr0m#nXFiWXHH@3wqrMKdlj_D_^>a=s3sTbQ= zCqil1e?cG1LrbL03#{ec8jDsOp(lS)1e>;-Dl_?qv7UHC7rw{dxlHs4n;M7L#;7nq z?o2883mK9Dd=|HzMBAxD0R}-ugTWq@J2WHxpXQjJQIy8Bm5e%3kFc3ARHh3=Docd4WXw#SGO2 z{A?Jm&X0{@UF9w+BM0U{hIByP2UJ`bKx|_6Yj>XGQ9HlSZ$@fP20fBVi~A-#v=w{e zMj59)ked!)335GqT&_{Md(U8&EZDR?gMqo>WjKPY1pyf)>SJT4V!&=0j@#dosKLh&@i#EhD z0C^mEH;-9vPV@7r4BSQ8n%r&h4gEz$i5?p)H~H`FyK0bhP7X6Q`|+PZMS&8P{2yoF z>S0iOOVt;f_b*Fqr7?Ka9Y^uGZu-!rT7gQPj~n}8-7A}_nD{$WPBz=&r+#g0)pnEc zfmv8v`Rl)_I3-K92l(3pj1WMFHy?@}8UI;mkh&_yWUk zC_`7t4+w#}!1JRFl_%#wuEo*(6C&Em}HWt@CzY%&nEwh z2-~`;m|oFg-#2#cBc>N9$s39h$xgEm^2eHZNqD>`8T1Fnn8q`KpdUG?8Ifb1lK6#~ zrmiEYI)G`CVFfnnW&eJeKfZ`u{^lQ$Y0LSzMr$085^knqBLPJ*$xbCia4lYnDOPf& z#vAjU(ko6aVMX9MSt#B26T;VUhG^yg4!qw!Ql>P4RDF6xIvr4#Fm!P9cy|A(yONs~ zMDw4=yYE5S1d-~d`zP{6Kb(hGiHxVj?z>y8FPj+5>F>-moaRZy#wW)wZ1TT z@Jut}W8${n)YpPU-!fyX(r0BsA=b1uWJM3b_yVA{TKGeX3K+@Me2>#Rl-5^T>Rde! z-O4l*^?7?OdTC3|_dIPOw&WLfL*sCF880l+`j%~~2>tG#=sm7DBUNU0GS69oFPtK| zxMv==y()P+bzN_Y4E2OOk>J(7PSuQ5Ce!L-IaZ)M6u#(U_^fAcjfu)WO>9SZ&9eaS zS;0!7(m%PIA;JCW)-ic?9zYX5HRDVqY19&*tks*cLv>RVC&*h#dOzRp`R{`_GGFBl z*#-;7FQ(C8I?|)6vqV@ZB=4^L^=HstSu~NfQDNiTWpVo9A=)TsLYVF6?T3(Z+M4~P z-7aSeJH!4@**e>itzVa3;P4iyKu+j>#!CE9~ZPV_uUTvV}YjMT6RMr zQ+?5s&~dpxvMh)8T8jaZ)Uq(=-^zCh1|&gI)?O={qIy#gQ2Qdplr;{T>^Sb1YAi&K zuWYkd=c`RTkMjLpW?e2f7hZnue$sx>S4Dwj_Sr(Ze+=UBxaRR>#6bXvcT#EZg5cZq zTaqH_A)vqy&pyLA7d5-sO-RuY0;!s~oTe1F&Kn!mdJe6aYSu&aU+sx4?It&!fczFP zQYbTYHL5keFx^qN-xh(i>~|5`?7L5KTpFgbd`-E2a=ppPjss3`kn`p?+mJ7O8mf|g zU@n4R7iX%XsO5 zHFVwQs-v9v6Ni;)e-)>WbF28`!HaxRF@na_S3471Xn@1i#CJ#xW{hN^TC;uJ^(_4z z?@l7~B_Fp~X4h5qPlTTHn3Z&T{;vYE^MwQ}zu^eHCgNE!si;~u(xKRlr!*y(RjcWi z{+{~L`3*Z0Xlh=6%N;tP_Lstoaoq^VKvZ7zA_y$Y5y01^$A|3z9E@DgxHJyC? z(g904C!naXW`IJ6Br2T|J;0t3Z~UKe>QqPjuD!d^nIfdk;N~WIyrN}{;5I1UjEt+! zP2OcvO!HTQDkf(NQCoMWtV2u%mAbZ=QUbIF zO-@hpvV1bGz`!4_{+#F1Uf3P)%a1@s57-pSzT-pdqxm`F71#(KM{Wd-3-U<^&Utt4 zbbGSh2*B43-Qys>Uo)X<-5OjR$&4E2#@h^Pqt*_oDgHfKKr*_L9ot2=5zk)n@o!|W z-!=`|lY?5Z;4rTol_azA4<4_Qr2sbNgsvO4_iIKL@esx&Gjp2OAM^EK?}izIkLy!4 z4{HU%lzQ>yW{shy(Y0P;698q!uau$3m?aeE@(r^zWc49gmnMCB7N~x;V3`M zB?Yt=mRwc=6j%!iJ@{hs@%~6yP6|!$)yCA$({MGkTUsSUMbu!dN$52ZW9aF54E&gS3$ue5#VFb7N|^WFGi#%n>`=grVG{Q@uvkp>J#mX`z&@H6KC;>V zt)^L%(09QFY8cYW8>}G>Vq5p$V%eT^v{R;iZ0b8uKy{ed*k6$#`B+kxU8fYv;e5j| za+GAV3y_%HeTZAG5pJdystbJcii~~dCBl8NCMcZTPg6<*@KK`Gu`qi(5}HnfbC;^s zqB%Bs-veheFIDEtcf01n=b?CDC9m+Or=oHW8;_I1dDF)_O+igqpjRgoR_?Ln8qC7~ zQ9ui3gH~!Zg1XCnXTP`CC-($rfoa4kk_@Pw9s91?tki8zYc9b;-3ii5u$>?3;z*PyiR^n>uZ^ml#8{cqeE*0lAkqiqsqmZO=3XBf3`hA1}* z3!b3|5+~tg0O{nQep$I|DEZ@vmIq#;r^vCJOg+RYmAX#P<{%C3OQ4x8Mi4{r6u@L% z_olva_oinfJmQ2fxftTEenND(!;7nS`{MLRw+heDQ`Qjp4ryJSP{AZ_N8@2!dxOs# zn>(%o2rJ-iQN@N?@&5wXQ)0*(>!!$5ou2szIM06Si3iE9JqzpeHopj4?sLxf!v#-t zer_-HBc`!6v8kVph`G|08Igbn-(=0X2;Yu>B$L%Ix1yubV?0^#WhYJuj$-<0dp)J1 z8ydpQv?s}X@JU$|Eas|ae4l)8%T$n=|D-=BGhvNK5Sv% zyf0BmN)~&=>$qnTZ4VEUpn;J#@eV~A!>SEpBxJv{fQ-5`^PTaOY@kk1`+p=|b%8`# z3>(TFvf+43?S|lMRgAWcUE8j9yJ$WUwxy-DsJc?KPdX~3>R@MCiqiiP?<^$PFdcHr zMaxhFym5_yA6!?IU_iabWYPv*xRKP0R=?)s%Tuu5_SaamTa=C7Iog+A>-(626^osb zuh0o(8LKJa%%cs-UiWj(Sf5FB)!C$#=1Cu9C-5$aaqA8g*_GSv%-a z1rHrkB7NUE+*m4qC|ivUiGOtqouwa7?t!Q<*%jJkrGW$?0?E~cK~ z1dm5%(`L%3n}mQSnF2M@?RkbH{h6@5w*6Z^$CQppXC z5zJOBI?Jv#E#*@`Th?&l!%Emwo*5k@B#cY&MJPq8e1s{cGA{TYFy?%e0q#~>ls+?RnrgIkwsEbnc+3IMO>*{IWhcs@So-cf=Z7Pk`*xQ^e0Kw{7m4*QH>98JtfqQ7`*{9W>KV~fpa8mZ2#$L|_KNw(Nztuo zXo$W}rg5z$Rkr5qM<=AHGE@Sin-?->_TLdz46C_;cNV{#m3T5N7sf@E-0neF00Z3F zOGg`XShR=AR?9NlmO89JR&~R#DZ1ED){ zZ`L6!Js!T)COrm`*zX%uSIB1Wq2SQmbe7(H}M@VV zL!H}h+x^@Bpv5-LuX()0?PElbla0O&8%*ndbGGEj1aq-mb7gM2uvchqx^nOrcCzvB zAjk+JA?_ri@*B^y31zhnsNBl&HeqX^)2<`tgw`8!HnHvUWY3#_9voS#)@?UhShynn zj2v_Gr%1srVyKSsK%wzRhPi~qU9)N=-gJdnsk}u7lQje*i>tG*CS&AVpGc&=o^-H3 zO8U8@n0Dox@yD4gPSnDk+>Eo&fjUZVC9joVQ!lUa2hWaaxcm?-)yT(!#c-+3;Z{Ht;rQO4*dh8hcb@_6qXeWmlq zGKfXy3m@?Jyz{NXzJk5X&lNo+J(yX?*|e`fJgBxPF=sb_mnga4gDVF!wKXl=%odLZ zM^rxjz1u7hz3*y@YrcFu3)I+av6sV~1KoE6Z_mHE_B?hp1-29Q>6b&1Ng{Wg>6G9Kgt| zz=Xh<&@=JYBPlhK<|+W2pdtK?X2^`{idW~hAW;hUfG3VC8KR%39x4rtI5B$ zyuP!_XL;~_1S&{C_V|Gj9_(VR-3JQXJa*|3bOHCI{H*mh)DjmP53`7FSyM4F$=ld2#e zaMTAlBH)w}=5PL(fedZa+ms*IzC(ih?*NkbrV-CI4mxz9_*MWIaofZN6N<#UIr#H&G1W z_`X*#_FFOLCY07^fZ>N`blD*q@A(t5%q$zfV9qOfhfXH5!l; zBS$*Up~Jdxq{p(%)HUUrYqT?-+ga1&RtJ|GaJ#+iKzwWxmiDI)P-4ZH84V!%6^1|6 z_wCmE>kGf&d$_=V(iy77T3n!Tk_Bwdefh(;_8gV^s_=<)Yx~#o2hqohHTD{b7x2G& zK$U~GW~*@8>q4IA3Xdvjx8yJeHoR-qPGJAIcGJR*$L8*~T3NY1<<8`KU;AIH_Lv*L z{40YONrlUo!%A%1^!OngULmKC26(h85M!0z?Eg}MCWyQe^C?H5@|G3w-(n*4gvc5m zJEMEudS}cZrKCq}31<#0ti7_ufO$Z^KlWt5D`u)3XW_*d&w&-_%1p4v6qAd7Y4Y#g z9jErAF*p8~GZ!0sy-*2-$V?%6>QKAm%^oVO8>C=faQw8wvEdFemyhq38pVruUVUh% za&)ofDd&_l%>Ir<&zfDxmY3 z{Hbb8%1Xw#5LEv5kgq)kS)A}_UhpCxV`Pk$WLjTgDU8)fD zJMHPCJIRQR7Qym$H4Hm*hAw4j+Sy`Aica%8r7vjB(tTATf_%wcT$pPoeRDZm%(qiP z5g^F6$6od&+w2o74^oUz@AUA5760Dpml=Z?zgJGQ6(m@v)jvG1FZiJHkTX~+zsq@= z5LS?02$1x_Feg5fF~psg-HtjVbxyQw+UTT zqpEpuLbPd*P~z=8Sl_Mx+4BDtVufnI!-BC{HO z74O0Rxp&XG#9dw-@rcUDa83+z);vnYL#a7+Y?l zAR51tq1 zrcF7f0_!AJbkRcPNWlW_BLntT%+r#hYilP}tzhHbRGxZ|6OnLJsb{%_mJH~ zLbRXQAFq=Ia>Od}JVBi#`>|!6^X9miRgNK1{f~1NrD>KDQnF1jQk;q3 zupic*ZuSxLx-HNfU(zH9>=#2o%F)FA3950DsSY~lwmJT$@V~^?i@4^&MNJ%I)DFfT zb)Bk*#wp+OHr&G5^0*s6(6DGmO}SQhf4q~FAY^)X#LDF3{8P+q^74Wk0tgd>jI_fg zH)flzn|nA^!%1}zSF0quST>L3&hn$Mv0yz=CY$U;a5>W-2xn`}Y}-Z6AwPPdPf^TE z`u`i$^Hunz<^01J-%NA&wsvMw^zI!}`&qgsU3q^~Q4fx^xHY@dKl;LX8lsirl~51)v`+2c3#bn!wy6(7 zTW!}xBy3$o4bmulx<;Mr9^UHC4R!HCKh5kbLq(vz*_ThT>G1eL)BbE(3z+M0L0n03 zZ<~!DI3S8ILbRwWZBx4$5iC{3?+f5OY{@{=1wl8UpV?%Gz^2K=hO}#6Us%CK5A)-L zChpg@4{A84g?I5K&wBh+>R4-_m#;^iv3B?4c@kO3zcU<4)Qw4TnD6FBl)V=Z-Dt(d zY~%`f@@>5i-7Rx0ve&R0^(a^1F>h^QS1LJtf%)Eol11f>BaAF?>muVk4~;5FfCu&w z*HStsAbVtK+6w(#uGTM2eqseZCAVZw>YF-{%|pZxAY!W|uJsL)8`RqSXL_c%F- zTf)6}I@K%5&XVpV+{5#;eJFnp7Ee{m%4gU&XF4g)3#l9tsP zAZb~AkPrHuMXiAu^$@i_@>ksdL{cxO^({}-4yT&BdcTlwsnJK2!p8T?k7lk4-ZDRG~YY9U#^ z$|(9v&6K*Gqq57Sv5;6&=;sxJDa=1pGc~1BrnSCvtT`k|k;e<1Tv0H_TzPk92uWDF zWkZg041RcJ=-&w*>o=(+Vvz--`>wn&piMIy9+GSCBC_$hUl`Gk&{Ww8X4CE1T)$|! z=?Zy6gTwP-k|D$rU+K!QopT7QXSq4#ur;B_4+Efj#^-7@lqSanj#_@ojE^Z+a8~+T z-`QDEac+Weh4gt>qhz#V?e8B+82Jg8SB~8MJ*V{J#nq)Q7$Tg7PVshIZ7e`)jrR+g z(a~*>0eMg==p>UQFhWWyXlnJ`4}2*shR5BN>A&H(t?7xqS=d#>pr^pr+f7t0_DlXJ zE1FX7q;d-t8)_8FPjgl;`76+WtW&s}{8!^c!iI{v0oNn9hwZ@>cdYPNkPgMr|ACp7Z^+EpeRBE=~VYin$3wE%#kx>j@tx|qopY_Z=#n%KZnr6x3_9JIS z--IYtUJ?bTc-4LLc#MWJfWfxQ;`uHZ7|ubUgCk+;=syYEZ%-^gHirrNzVu17fsUJ# zPv%~;+>!X_e4J@LBULFL&rx~=$ME6?>9Zra!w^s2a|i;v)bsl{xC@+Vd3$PH`r!Y0 zy3AU^KUMhqmMak$Y7#q>z1>0unc1R0y$JzL^57~#0#h;O^te`+_6=yq z?vcu_`hoMOz@@q9^j9S@*yNpEPniOHj{gpkom&{ z49T164r#Xd*Gj;2Xk9@&e2CovQ(-%r@1f6M;SX<(Wz|+l50D}=$ItQ-bFN6yh|Yex zx1StN*nMndM+3@5u{A-csXVS-X`lU3GhgcdiAGGOG9YuQZdN`CUSQ!*ewGE}m2 zjz8vi_kvFg!|#%9i+@*BpgfCspwe?QWP|if4Nl+|G&PiQJ^65lBKn@k53M_wdaqsk zFYKDlUCB`PTQ+`uq~ux^jH$+_PveByNdzf-lQ{(VuU#Y4}}{fLJcqKuU4jaec;QoL5fCvQTiOr27{FTd3r6mz``pk z>_+Ylrn~$3=AqnCe5&4x2+e2dlU_!kQ|$ZVu(8G4={TF1k~>U~>5}b%)A&7TLz?aD z+dwgLGJDM*RWa+G_F5Z9r8(6KWCsVBHtr*8LZ#yU_GI+@Quu#{&h8X zByc*AeL!ax_1U~h)AhD;j^?l@$!aIG;sz5(xrHrlhxfKJertFwUR|(Q!s)ATkYtGMw;^4r$) z8@zj?uv_z&f-9?vWLb)h$ zM)&5fHXMi8M;8F!WN$B9V3aY&bn!x3$pZV|hckPmP&*IFAGbF$y9J@nhI4-d&9vYv zbAqpY2mCg6a>2_3uNcAIn``to=9q9AB3Jn)X&r@L{;}7#K2xq>tNOkMoMM_1M~frR zo?DRj3TY+G`LfxxLpF+=Zay(*7y3oliD>#dnpH?*O07(3fkulC6il; zlrs9;#nnD83Y1$s2O^hu85ov0FrU?l0WyiXYibfgk_7}v*@T(!*IqP}+d89T8HM?! zL(K(MHD@J0t>o_^5pV&K(hR4pQIjjXpIN7R7$a=glFeLm@4JQpI{vqcf_{y*uhu)G z%%dJ6$ok3YudC!$i6eWxKEy0&iXkeXF!KR(_`-Q{VKStv<2EK1Fft8&eITlT7w7{gS zD~tyGUgzL$vLV1Fi^~ZYX=MYOKD``b}s3xxRaegRCTx<)A~C==%AT<7MOY;m9GMz zR{*c#PQR3!(O1g_trD>{13fUzgU*I+-gm0GmPJLOXp)NS!Dsr0;{;}uL}+vLeqsgW zyu1V>LGK9gkygip=I z#dpBx5OBU!&vSU);d@1RgC`8F6UQa%KAp)D&0`@+=J##i|o`CaN_}eV$aY*-n+J851HK3FVF3M zA35!uU&;fGigKyuZw`mmIt6@0)V))0!TN`N{%jn(vUc|7qh8<1I~-dUJZ*g6s@@`4 z+I<>kV@+|Psi#|q@8C8 z!-(9}Rn(;g!s5iR@wD!R#!ptGI#5?RHr)A?uKNjz=?Jv+zcP9fntE$1NXErFjL0%9 z@{OeNdCdmJ3D}q?u%D2^5#aVQWwWBb^#kgBz10ee)BlZpNmH8anX7yvK@q6@X`@vTWW>n~IDUQf|MB%E@KAqWHP+2Br?8_izEJKS;au^r!#S^iDF^(-5WFimCcgK|{Kn<{4pPqVx+fGJ31iyXs<$V280 zDF@P{h`3L7=}Nilq;i1`%r19nN$jDWy2|f6VjSOQlT-I79tS;+;OZLx@`^#fK+-&RvH9x7l-pZ zCK<_P-TG1R`OCZ?#OJ2p{Anz#CiWOmyj%=$6!R1+%@Np`j1ihm<(II(h9rU;V4!hy zu1`l4B(dLJoMPlV(|q`G%c?aJ8w{ zz5(KyOuSOG*K#ZDw+WIn%F9sL$H#ySCZVu!lMuD2jG zpYKmS1$HBpO-WuDSYH_)P1vRhFf8y4ZY3&=g=H_*2RaVzGdm8sKLTkQF==+a!n1un zjpv-W0ORM*vBCRuwDPqL)r!k!zv1R2=qZ+IZce+X@!TMMkgtNj)xWItxvwo|Y{JKq z7sQ7i?|932EYU7W=UxK^{n$+y@VI&c$@#W82KOV7E_S3+pabxH0& z2zG&Wue{%eM{@2RqA#Y|p4Q{M=nNosgYPIbaaTM{X}cZ2w=#U?5eoIkt}JQk8S9=X zHu`972NC3G;K;b}=35yuG{VR+;-2DmUB-E3{eigYTgrkRN35?w2DJ8^sRjGlsdj~d znRJg38-viMD*T*AKoln!j77%2|Ro)9cGIL@Gshef`^lr_Nv~+&efU3 z7VI#(;o`kSj?jSF>J3c#1)>#F>~M@F(t>fHYHahE$RAte{(Y}17RJzE9lM~y+v9i- zx0VZ20s5=fy0qxsEwkzK$1M2rh`<%thp+oF;qgfqe(r>PbDdq2ex82p#j4pJZBAiM z&MY{(yv&f~i3y1b%1PmS7bY|uK)r3{!`Je3{i8}7W4^V3neuW_L5WkE9u;9HDTe^P z#Q7}rmz_z~d#M$#7p;-&?5d&AwN!&d(G$6|s(PK7O&@MNSF^cgO1-$51>DcpU?Lex zoh+7){$yeE$Uq&hK)*}5EoJr9a$#U}9>I)|y%avcM+pI4Fpp)kb_ym}VPi~|p{IY4 zr1COD;Q6I+#9dS1lWU;k6PvUGdk+boHXl?F5B=w0F(mqmX3FKT*E;tyCePQhY=vT= z^+YKpuHAcDQ`WH>cWIl30l?M^GMhHssA!}NPb|vmo&g=54p-j3qK`ti)6R=iZFRQM z;j{fL+*tY%hTM;OoE}kdXVrg3e&vuoIV=DkPoRYyTXZ+?Gd|y} zvoWsDkNae>+i|I8_~$b70`?Uc*HtmRu!XpAh9;$$-OPwiprBd0*5*1v;OjOLSMoiJ zN;`$G)O~smIBfnN6@Psv&Yi>axncf3w=@sp zgwEDQXzf!(aozSD&b}bB?v9{BMidTT+OVcm$Zthk-OFFPNDUt7_0>imq%)<6_1#16 zc(Pdym6caCH|u9~YE*Q_R_bwa;5NGt-7v48O)fw^z%I|c`=}tv#3$S$Mf&kQF-{%h zD$6J9>O#YQGz;pupbim(+C|m?5lLQDmfvF>QWIvA%Df3i}+R^RX8O#xEpC#&@FuS)TMk~LJ)L?VkV`&jz7i)bpLRkD zKC3M%7Mx0nKdY79=&H@Jug=#kireo@$k4mJa@Yl%lE?5C#hoeM1SE97RXc%!pu^+5 z&$a4D3$k)__U20S(wH9DezIwurHX^+H(j}7Ee{VN9`CymTo^I3ec?5<3{h>Io#)cB znL?8ill=PxUvVfW$k?;%!&=7`!mUB7BtgQ%M_0aqhZ**oPTLnBL?7sj_e0a6Y@=LY z>;aj@vE2`w;sFjf^oPipG8uITpOu@KU4CNvXLp5^MPRatGyv|?PQvS~TAa!ZJ<{2e z2@&7AB{kpv@iUD3x2U$6^6V&LD3*{K3$@U+G1zk$eI85H%FW8z$-BWm(kitQpv|~5 z`^_OW>O12G8|iZyCMx6{%W>~X;5-$Txr$y+;b-929gI754{34r9EMv}-ana_{ib{lo#LAW$ZmNG zr3v8alf?SVV5)D+!s@zEqJoX%bFqkW*uEXYw?3WLO$77ps0J&nrOG|rD0j89z;{+X z2k5sfEjYStr=j+_Wf12kKrz?|V^|j6u|Fl!Zj&~)QA~G*PMp`DJd88B@NJ17qbD57 zOpm^YJ0;AxPdGXVoFwyA93E~QJm?gfVl zh#MCYiB8|xh$iQZpQl*hf3uhZ4gmSW)o;THO_|@h*BHl>4kQxh&Zt^Ej1NW_GHw(` znndDn66RiTM72@Q28jEZ7lbQd;yksb=iVcNFA&Cb{i0}-#TUqGjFxX=^c%EKS!=v= zq^>C0Z-Ag>jdAjIOk4*jrvr=&JZa1H4{BA>T+Jnd z*Y5*&;C#BgJ^QiYjT*tM&8=XapfnUikJDs>#I}uV zaL(wE8ARC#qiTP7`H#8#+qq%HCc+sOg|oGehnvX>Ofm!HowF{-i!r47Dl_Z|iwDattyQuCSd%DyjGqjIJWU;aXe z10>3??sIALb(M>pn<+ysM*@&V>t=hq`Qj?lsn_vt2zA6*3^hYA#kc6xsn~=o!M*fk z@Wh^*s8g#;b{6otH{_{mIM5Pbara;tKrW}WMQTvauC&6Y;8)b-q{r{`toBtP8ZZ2o z8&^Qy%@UpZcErsT^8^;iKxS<^YmUGpXh5e!)m(PulX4_ie3*{FdL~Lz?$WR7n-!A~ z8^&>lQ7a4^RnQ3ShP5gslH)U~W_R-|g4oAXA8pqUBXFh54sWy)*>qW=*z|#RTG|q~ z1OC`9lR%@xIh|9p1NCM^sZwy%sxF?J;Z_lYdA-vW3>z{_JGqFVE*9S-N&#`Vpd0&! zY)SDeRb9O|JCRxi@xJdDpBzGf;e+E3V7zJq1c`*Qy9dvrlAy-|$I3-~x>?@T-+=Dp zTVc_li|#4E&eY>Bs0|G8!18i%%hQ|p9gNu)AGMXj>|TmG#deH`HcIxYBBz~&i9^YV zIR3pO=Bo-TvfliZ{W&CI7cG<01?lbR$1{TTb8qF`ZOz!DBAoxEWbG%k)yZ%uu6N)e zNXjMVdtBf`lF!Sq=3@2i(*?7o95gwiDmKhGzeuVj_ry9%fiY$EoF!DPaPs1t85)aG8g3nRO#Y+x>l3?-^{g9%@Rjo9?q#rl^PzDIx3qJNUbN=X7j!Wa@8m-NSQVT zbxLmMg}Z&dC}q9ASQc+dQTZ9criYXzm#5n`G&p`zrKuPH-L31*?qcZ6@iWHSwF^cF zLeZR3_^bVnM+OC5Q&ZIK+;e>Ul3`U(e5sd`?)53w0E3+-xW0fR7c)=#Z&i!DiY!?m zUnU=&UqyxZ^XYtgXwH);F?w}2Dt6nr=bkZ*-tXWC+1t!A%+37teF%TC4VJ@;)k4X? z7dpv$cW*}Rdu21O+;jLY>o~>M%N-kB1JN@$?`5Bu+z@6w82LM_AQ_eK`;4E~HEeSAb$=br1|tYB+so%cl; z#j2WQE0~N5(7aymaE-X(-}BQ#fbywI6XK$^IWW=%ju#6aqKit}*pxiA(N_J-`$~g7 z)P2`@>HB+S;)&0ZLE35SJ1=1Q{sJoC#y5vMJc&_juBL$ocbzdx$l1pqoZ34P;Ypuj zcLcsvxr*N^&s0s7>P7hLefF&!SPi^ZZ4w)y{e5pDwQRJeb&$J=iK$x0^L-WiP7KflJAjUT2j{3QnZsoJwmF{wU&E*)@1hRn zOX+-jN3hSTCi_h$WI@KgFNchP5$zv=3TdOuCCq}$%sUT7Y&Any)@EOS2S#P;u6tD2 z^ryuGOjXJfvLmj&-X3Fh{-40<7!s5`6bDo_j4zSf8R!2aJ1HsbK^e$oF~_INIEDi)3>Rgjvz!N(jkbULAq$q@70CsQ2Mt1Lg#Tn5#}=$Jlo z|Lp+ZmlWh0R(&B{()6HW{O$Gjx~SvAaqj%3NDVMM_}zw)VW%R-lOMs{lXmVp78K8@ zUs;TKY$W)bXmxMBjEblXbCoS<$bwCLRQUiF28K3WK!(-P-&oAI5rM+Pe0!dw`!oE#}mFv9iVY;)Eqjqo5j;UA93YM@qWIyuGY zckVC&q2b|$_onHdFIHZ;+RKZ4`@VpJp29<)PnylAq58PgD8>28kvgp+$WF=SU zW2itOxW^H8n7+$lA)q^b(IF5Mh={v%XbnzH`P_D^eUd;C70nNQRMD{i$7u8~Y?AEi z@Fad1>qe}^;+iU0sn&1gG<>a#9|y&+WF#7o?rX^VUTe;B593lH5LxWG|2Ws zSuG0_nkm@r9m)f$OuSLd zn5(}kYVBYRs)Q~QNbGrN&ilu?#;B{WB5bKwvS-7pwPk;L>lx$URjpLGChdHTc7X8+J&7+T83;^nkzy1=-6&m zZN_ZhN?w{SeLeEl9j3XA4SB$tF6f5V zj`RmobG1SY9CRJM=1pL=3Gmq9*w9#NZYlB^Ai!#29~X_Z`80%_)@WO%&z3OG7EL3M zFbMNjOUS8!+A#_Pu}GVtTMLrq z8UwVO)5mii52O^OR#cvk2R*t=(K~c4SswGqctLhC7OHB&fGezv>Ap+FsyE==`D9$^ zH6WKCtgvyxB2RC?+u(ApgR2?OCQcF<+BaC}kla$bZ{*0iiHB{&5AVzVF@jfKrqp>C zK_LM>;22L9S{KcQ9USd@3~iW06M2sU>Y5UCXjjPhMnbjQ`OS#-@okYMpJd{zula zQaTcJ`y6R}qu#Q7qfSkN`!=^YD)~9PqhhDx`5Bxdq7Ui>t>+c)m@QH|N(|Eac{P#N z5dJtXbbxiTeasC<+_$(VUTr)(sLl23Ocbp4ZX;kuPV1C;+_F1`oEreLqq6sogK4)j zL$2+pSxCUvp`3*W~AKly!t z&O*8tc|(&J{en~Xrp$SkS?z6o2GV034QQJb8RtV_Rr_4xovHH z*`rR*2|CP~64Pv#oihg>=CH(s8T#jxhQ!9KIXYyMHgSRHng7AOIUdvgk!O`bV9?P! z$qKM|MxK>V)*x%xn>pnvcG+qq?th_cntGY>Jy@;UXsCH7Mp+{Dp;vnA@mttzjA>p5 zbxh6|so3!AjQf2#x5_Chbbp9VTr1{!9Y|5r<=jWwbV#MKatyo#VUI+o-tSO%%N;xa z?YZKMW`*J7w#oHjlYOa0aU1oHwhMaRnfd*{kKpM2 z8QZIkCXC@>9lv?=a21I49LxLy8~Ot07;vw7`({3`fZ&(uK$p)MTwYSkJY&;21OeIv z^gPv%u1dvlPw={5-`wwHz7jzka|=$pE(KKqDIZ7YPjkb;&g0adZ3FIw;&CsJ{%{Sq zd_AG{y^{_w>Rv0qY85Mw6=Di;aBN?@NeH$qK$%R%H^tL3>Y9Iv7|Yfjh<()o40pE{ zMzT#_EPlP#lSA+&bK?BsMy(P(5@s;bEvDZV;{xQt3&xlnaI#$A7h{ihUc6ZObGHCT zR;7S|!XCc|(>~2WQdHB|zQcN=-9-<*g)LKX*wT(F1?YWS^Q(<;hH!Qp0txNIvo++& zFZ_9WXpg8_`ce*qR%F=wy%}rRO=?kv4BgQ@%cYFcMm9TX`Z(6V-6*pB+7wqrC#ouJ(1fJgH$%j#+$_5*{Hmyz<(! zmueM{<{6W|Gli?hGyr5tH&U=^d!oQzD^2ga&D6>C*L&7?2!41E%wU4@HC^K>;cSa{ zLu--D3v(43k_PV&cg{h@b>nwpQ^Ma9yt$@a+9A02nFrhwq~4{MXFq7slf_RdtTbTo z@9lBgz5P{xH%w!WBy)Qn>5=A-?3VmdzkTMxs+5%}u|#Xa&85*ppU>m8 zhQI&xrG$&8@5rZ#O?bD_e2R6)OcrQtg9%#Oa4yaDlvboC!W{8AmxgbCAK81vqebEl znWBH@MBtIF69-qvQez&pZUD?)7q_?y9z4iAV#akv!|S4w=(M@fTUW~V54le~+_%ROdSOIxd%#iL zw*i4?s+e4alT4Byg{0x_blPFNYq`o&kx94P>6%`&P>#P6!p>H70%s2jFrIyM&z9$| zdsI&4>Ie|Z9uon`I9X6@&&9gwk-51d2BPib`P}JM3xwQ5uR7-Bpvs?FPqi9@k#i|g zd~D95;7JWuuoE9~`c;ul5k|Lbz!omuu)KXX8FtrTaPi=oC9bNQ$w5syv|#hM7|*cx z1aX{!X%2T8&`q&(JNG5LGLI^ZVK=k^T-Yj-oq>92Fv+i)71IYt-Qg&Q!S;-!=9z$GPt@3%Beqm+i}z-ZH)!Gd4bpL>i)x$UO|a!;-tsjWg^D+$r#R)nk5UJ3@^;hzi&W>UJ_L4y#f7H$)bF*o(mrhJP}=eY?R8*{W^C zLYn_RuVMT%vh{apEUA)z%aie{+kF>Z>0AxH%HDe8qZy+%q5zOwQS8KXPziP9UJH5s zSu>#E(fxLnaK?ve%SQ8XP=KM!>&B15RsVDblxFf{nF>WPyJ%|!qB|*Uw2@tE+ZXR# z2?|r&p1eb`Yj3AYoAo70?)8IZH9()qvr5<%#ss1Zssh>XNI6w2e^m8 zW&+FCSSqel>T6Xr)-B}gdL`{hRnM?n-_75NX~>ciUiU*(!0bj%+<&%>4Uk8?_RHpy zqmSX!&2^mT1av#F`G~$KnHW;W3enww$*LH>CrxJ4I2(*MY&fr#0isI1uFd}Sk2m!p zioY`kcZ&~L6PTk&>~9$)N0L9aXc#9dFSrsFF@_^;L8{pqMHA^I88)Lbu;lbIni6Go zkEi4LA(W*4a4V#{yI8@CN9fPmCN7LU?^Vg;DO9fm2R8gCv_JR~jy(9N9orJ6sDCCA zGRpTtx@kdn0nx*8q1R8O8oI@wbtP|~shSGy`8e|i)6w{gkT@Fri(mvAc`%@exN<&U zLn0jMKCh3w3m5J<6GkY7xHp8~)Jv47XvH)0lH9ow$6(Ml5}wMCP41*V#` zJJ29kZF8*FrT)WzkI_dF++}x6VlSA2uOulqY;VltC_ueqTvV|#nxZrw78$+|8bIp;T#u|hriDAsip;{*nVWiv*HjCCM1tO07@i5=n zRN+mQg|;OUWFB1j;j#fqr`8YGN<|AziBO?of;^td{$yD${Desq0NaqQ2+T#txBJ%}{)~2qJmJ2qRoTPuvGKlhQGX2f|ScQ?5Gc~E816~tZSTpcZakN;@Jn>jFc ztkp$HMI8gCUyDzj9!R=z+Gp6pb1tx2$@Q+IP6fLB>z7Ot#pF6JY+<_B7xoc#ZU2f3 zzV8=wLD}T{k*18RVJFo=#;j^+0d|mYmQ9dhl zs@uL@;Yh1-E(nX+bQ7QMmFJMSM~X3I)3<#Az$Di3avzk9zDG$`@K+#y zkG_~LDD;QSzPNc|vviMrFc(I@Z5x{-)|Lu1E?rspa|GS~BPFuB8=HHj7e<;P_N z+*^YqM!{AR_Xko1x5M&uk)g{j7=eXt{R)2L6L}h6+KEw!>msq|d&n+|)&c_Qf?)>5 zSp|w#%XFcYL;M6=Puqf|^q2#hG?`TcsBD$u2h6*{wb{mxTQPlXT)C|`2(j|G*E(D9U z1|7kUcb~AJT)}Rqyw_1l5S#$RqxS=6DoG5;44i&GJM4)JX9&;v@X3%t0f^ph!66S4 z4AxSPC)w6-I5KQ7^8{nhI2M$NgX__iHOJRB3mc#YY@}?i^)q_P`7@4e4*2-SRRo#m z?+%s;w&&tJN9KV~hGE}UMFU9!<(N)lXjy~g_8;Z`Redt`y|$qZBr#FA;s&AjscS6m zPsPpfvEZjz+UVGal5Cf2!N(ZuPg%42 zLl&d#sJb{5KeZIzrSSYg*LMj@_UlaB6H&i+l#+tZyF$AJL&YD`*3ypH^lb_K-qBjX zTfCk?NINEmZsi1~&cTWNN~^;cOf7zdRzHiBWAuC0cDp&sikREfi+z#=suJsL8qZ-zApgPi zY3;tPz`{mHnA>Z`a?RfXkIWZtv4)B5ZEp6DS&b9`g=eQ(4nii8dUt50 z+_olC$C6dKly=uYubx*~`3iS692KymwGzK<2h|^s0fKPnm%5O5Lg1r(qh`;4N&Bk4 zH*g0nxsi8NZA@?|_1(Z_!r1u(@#e=v`NY(sfPu?LLp08ewM6wYxda|n#&w;XC*d45 z_Hgu%hba2}9CAw1`cN(Flp?!&wuv1DS_VJL;56=`=y>jB*F65A^%9kS`u&M9k~Bd| zSOTdXcq5emExzTuf`P(_EOy8Nf8oXo;;l^5@tp+)dlJiJ`D4ih`Nc=HzRX2c%NMcL z7n9xH$QEvWazyU?KRm{>4=%5rINhRG&M`O_jbP4Q!zVRAtTG>YJ+&^a5*k{7Zx*u7Y*aXj>WxuuFa&zemkb7ets>rg-xIN=9w3gb@`}^wBFQ;<# zfY~=ILQT&-MXTKk4rLYVT-_`%wegKsu-SNUrJPsSct>=aSCX~uNrScqSvsAX{w0s;C-u~%;#{%{B&tp~* zfS%2L%q4=mJUvF@Cy`#4Voj7&Dn$%-kZ75ZG0xV1@U$7)gA!oOh8^koQB<6|>ay=b zj-7X%7v6lixS;gw-ZCoo5%%&|RwVat-vL8e=}aNhFp1>mlBty&X_q;>_mEB(H9VL@ zvgCLD1bK0oq^t(TYP(~#bf<`G*`)yNVF^VH)Nm#&!<*j0Y?(_vr~ly3=4W|YL9zP| z;-UNtDs^;516~*$;ogEb(MS1*5XkL%&B#KAjlPA>ztf(r;R<)o!Wbm2JF&0n9$;e< z>!%$^zgF$fPg(i}F_GGkVSlwB>|)h&3R;@w&p2*#z$lODh%)e(1_ELl0i1CxTY-5m ztYXIln;sl@NvQMXvR!>L{)BjTi_TZTJcX~J_{B#R{Ps>-uGkxNzKnnTeJ>J}3QLGG0N zZ@>eAq{p|DG77QQu^HV9DRFnCSTx49X0DI@c+U}kT6W=cD+;E*n!etWQK7_=l^)ej z%E@gC);UYEJq!qI+inhzR@6^AL_zEJHQ);sUyi2f*u z+T2e!Y9eJzcR-My5jhl{E;P@9gpppfxSL-X$r}|6K37%hqx|V4<9X; znwKGu}Q4=)ueaYUXlh=cpd0mV2~Q0Mokzr7;~m-7wlXDDsvz&#Os{!~;%ol^`v zU{{|Rf`9;&IKa6DIHGT7YbC2g3l5L7C}Ch@JviAoVF)|Faj^<**g0^5UG{g&Knh=i zv^_-2xn2dDGwC1-!c3^iO*aB1Eii%?CrP8|wV1{^A*!Odps3UI<%C-fnwXAQf<38( zEK^*1aw93*?Gbv$n0r4pY(oRCil5NmMiPE$40%#qX%zRN^I`XqGzxO%oTz$7R>czO z9*HAugE(bTgLy4TYv~HRKEYfJ-YUeyO?@?I#MAzy)oBJHxOlL3hfNou5ath!pbKAFfVLq`^SNkd=Smh3;o zBrksiJIvbB>03zIlwlLew}%6VcKrbD8b4@ZB6s5|3C zQ|(#&Q?0qs8EElOPtjjJ0vGSK0@JsfNIBCT znn)?nWPyt~C#>hC0MjYM8lR-};dWAf($eA+a=6kRnn@|s3)@LW&N!Wj9AxH2F4J$1 z9EdOWmCs$w9}eL6@p$Gmc&mPf?BLpPgvK-9zXCr6&KEKse%-#;{{h8f*%y_2YMtSV z7f%-H{_AU`WcXB=LT3d283L%$AXVuF?W7hYr+;DI70IT=@$_WeJldsxo6x;$l_qS7&Ubjy^z{hAh6Skb$&)%B zFmPf2NU) zK%Q5A_oUC|b)CXiO?>0KziVly4`Q#Kn&XLV_>1Tl(FpvANxf_JV5CUY>j#|R%G*EC z!8g1k;|3I$PuZm42D?ZL?)qv%covgdKxx6#4ZcV5M=nL;BRBSsm7K#dw;Wh7@2-*j zqi&~QN&`>iG~7mdNve&RVn_h&cjnx(!1N)`I2bH6QqhwI(?A1mPJ1a~tPh+1y54`# zQ-bSOr|KhK6mZBs&NCb#vPFW~e0(@aF231CpolO87XZGbN15FD3FaqW0kl32-A=00 zTj}q+NQGnr)~VQE-9MJvGIG%Rd~|%1iK&_0@?i|220@PiCmj;H#&Ve?)6=V$|7)y8 zc2`?ZU8D$WBQvRiPQCqEcAY;3D$t{I2ZjaVfJ1|9-${A>-!Eg@6Wu@Ip-@pR4aw-x z{Wi8BxvNbDH4#)c$-}bbL4fAszx{D150z4!8Z5u@rNT%Q|D+nlEqKrX7yl%oH0ZRO z_g}~f4&dVBU%;ll28fVLo-B~`k*mvp5Z^C(PC@iWufG46hLcCM-u$CIz_;q(m9VL* z2dsXaJ{()4+>^be*`#Bm(v9HiFxd4t2u|R)5#JD^n9QR!ZVWS~ho&dpS=T|#fs#+| z-oNXL7kFGEGZB7=V@`;Xp{JvgPElhdYou^SRR2e$>)=XMUlc{;RMaZR3!{JOl|h^? zK&PaeHIZHbzKkm^9;?yXSS~g*UIZ^bNA#Yw85fm>(^>KrZrsAGzUT|(t-hf(wSjbv zGWR^Q{&JWgQimY-X%K^#wA^WFE40{g84g)B&kY4Yv3$ahhPQP4;^h#pfjiXG#EIgc?! zy1%0dC4oEF+v**Jy|7FR!o>^D*44#))G&!rhow$Znfw#i8cE3k-idJ7WBl9y*QW3M zbj8Ee($nHAN}ez}ahdFlgH}1OUVEH2GxDLiR#`is-5Py$R)TRcyy;&4bAi3~`!Q1z zBw_SH%D!FgYg!I?WJ)c!qvNSv*DV$Dx1i*U%zvlbrP){cUg$FoT8Z z!sGZCS;o?zZ+$fu#L_*f`+4<-b{}8weP@}aGpUjS*!X8PS{kvj1GjE7zGLTQ&3t&{ zYYf%mV@dHh1)`)tt*i=g&??38jTpGuLd*&Env3Wt>(umD>Dp07fqhmviZchQ`S2rK ze}wS9XQ_e5+1qi`CCWH0!<`NMTs-Hnc|S(r zcOk2~mu!kYTIo*2eN+RQA5CQa7)S%(G@fai$7lZFMpLjo&acc3n7G1j+EE&E#O`{E z5MihLf1!TD(gA@W;@K77M)=`}u}s`wI#al+xpyaSoG)GO!eVj8v~ z;rZbD&<$#Ek5c;2(1B|q2fd%{a5)e2-BenE&sE3AZf8ZZdheo@1b)xh<+i?z0IpCXYFQM2l{NR4)?KsXg00NP)GkJZ0&{h25*0t2EhME7MHmp{K$puunImq zLU;YqO?(-4R^(^hS!-4T=YITNC)NtBZ$juS3$epjU@!2F<=S-=?+v{=w0K~RpN;kz zrjk-L1#;d4PMRs>pmH}TN^u9717#ypR6O$-w!{EEssV$ zl5kU|b5+^3K-ci%k7vH9hX&x?sfnBWOZ?d9=2hi(tuHS1`&xI0?qs9K(*I=OHsw!= zL2UddX}~`OzQ-3PPu_5EB!hS;Tf_GUf_p#sZFUK^HY>i{TR$IiXs1U(H02ok{XvC7 z`b>l9(kM2wLbcx2Le5kIK2$m@+ud2$SQ>)XIc!6tnp(f+<=PpkG=>Z?;8|p!atw;_ zt!Pkxu+Gmk`HOFS&BN~ZscZTd|6kHLSAHc=&Ww3bK_b!nkMKWT7$;3MO1`jYWBV%J ztf0)a6GrUqbGsgQ_HPekxzQHOBS3>e38%F2aV~aWDyyL)#H}Gbc8>%tF>rsh(yQFg zScU(s$|>xtCjXZ!3J08u0*+;-NFfgRPWu_@#XOeykwm*2UEsIMD!aPJdEcAgwgwML znLiK}T6vD1a#hBep!>A2#REaJ{OD~oo3-NAv!@6wxj+2H^bbc*z9H&6uNj91HTZ)P zhgd#WRWVk2^uNVBIM{#im5LmYl7Fc*_dA`Uv^(x%sY-q8%6ZqrChWo`0`wG4 z0C)5M^4l4sn!F7aXv4~3P6%Xe*qOIM$vpf0PrOvZoYbb_`=1TN%FV5PI)|ubY2rF| zGw&}2VQIhi z1M4y-fmzNiwONI>Ei|s9D zYfBvIRV^O+D5v!xu#%9y6Lw071Nj-^|Ds65*B#%Ry!ReEfPL#X>y(eRfbT3TQ$QSA z8V5^IMvU08f%`*w*iXrmK!ac2VR!Q6d#t!$-roo|5Qzch`afR|?`ACz4|NW=uo~#p z<*T74{>J`B_{nGp7bG&_2Mr@OG}H%0!rM^ekZYB}bY6P3%0t#_3tP58U_wGuc_T+64 z^ULc%J>6?mKmmWX+r{A<%hCTFGWb*KK{2)uUVN*0lM{`zsH#*o%k5gsw=h*JsjS@m zO1Hk_ggz|OwlFM7%;;MHML$l>i2WYT?IkUjI6{-Tv#wipSVW{b1#3z1e~1b0?+4%q zuoCcmn_m3BMbexv1Tx;SCOATCn<_S1wWjre0u@XsI!c{YItAjuG2Z@@?EfV*l6t-s zTU@4Q`Zx9(l^OqAXa2+g)jjmXSAnU3 z$e7P&BX-eUo)&V^!}d2R+cZ7_c&`{x!JHWt{8TwSw1|LE7??so(Rwgf>h=Gthjj#3 zkV9%EUKxuyRpwoGGw_gE_7w=Et!b`+Wt=L^xrmkUNoL9sPsrB*qy7V0_fFy@8E+ZQy7?D71VkFDD}0R@(po*qwasR(19cPVFQk#VQuVT zGWIR{J^up{>aySmdTL$nxbo^1J_~c8esQ`2Oq}j@4-Vl>IMAE8Cl9+5`yx8$qwy?e zEPO>2kkLsrnqF@t(WYOMV4PoWU|kS!9se7<-eB!I0%iumXNY0#+!r5~q6UuJ_ZAff z!cFy#^Gf!{*LhEe(t>2EKPGHN4?#V?hr)V403cb#bq892Z1 z$>hyZ>N#sPcdhS_p-1osB`0n}Vku4DpyKhTOf(^Rk83DsHmH{G5D{n*w3Zb zK$-N?etk{`bNo`NtewZ$I<$t%h&Qi7+uo+m;Vw3P`D$PnwgDZCPVR4x5dGoi=l@MK ztu?pGY$b6d-&iG3t8UZwkf3vDjaHBX?<)B~anXbAOADI7KlF@h8axHpb2Yq4?#!3z z@C%1rqr2|4YYO4dqzu?r|3sOscfR7)TK24>vATg`(f=9j?*Hon%Un+^dL>{L7`Vn2 z*z+kdzg%|Er!O~=E3>7ewKe3ouq(S&_}z~=ELGLS{gtnbCkTy4U#o~U7h~ZG6lpfM zIb@wt6sCZkXZBZXAC$u!c876q*1RL6uR#R=2CLGx+A)pEtD(!B_ANp_UZXp%<-UfhB-p#mLE)~l?GjC5!+tFvR8aZVQx$0UEcS7jY|j%}hCI*wHx=-mS+D*>4*f!hB}F+@N8cK}bl4?Pc9jvcw0<8nijr{Qu z8;X9d6cK(OkHCItIi7eB_MTg3*-*eH@D{!jyN9g;lj^w&@v2HVjr&=ako(}I;wd)r z(#CEj)*qdlR^nZ1ho;lO;Ko?3)}NT^2LVi@w_CM0lRLN?|MUnoC#h-uSgjXzDPO(r+@8h_%E)8EdJGOGFGlHgr=PRB&^KJ)y{#rT8;V%hQS znbx10X z(jWG}XM_+)HM@nG;e)dettvdxV2UmG^y}3QLvvbqM8(AO5gXf%&Q^Q;m3SZo8Y&j$Ch@nb?~2mSyRrN&D?Q<4OrKtDI?exG*)o0-!rUvr&zHfOW|wJbV=d&KejDhw zEfX~FlW7-JGv_elu;loDh-CCECPPX*^1^sNfr7+-rD{pPKb*>kz};<>8I zlFl(YxwBx(lj*X*KOYnQwNz00=^yr)Eva^;blmuSA@vOt`^$hBj{^d^woFntm>ha_87Y+$x%`xiqx+Z zR2SoFod0psYrU%I{<7z07N%pO0UV&+U*gE&Tsj}3viZ~le5SIhxzB$OK zGoaF6;15lEQnthPF*G309qw0V0xx7~Vg)~gfLWJ~nYmgNn?9ESVS%3%BQQ~zb8T`x zC$5KxR}7UHlr@Uq*9Jct`Oc$s$YYmtaTNOz`y2c8!44^r6qEv!s6(z&iDK8g?*9N@ zd*uYUTEja(i3!rwoi{x4hb z1iq6k-tM~6(x_Oh*q|6v!E%z!-9L*{fG>ro`v&71u?pzBK%6D>-^Uyzwz1J_#U1sE zg^Hz4mcfw_eWfh?+qA(=F|HlakU_5o>JZ>U5 z!Zf@(L=%r7+a!Y^@?2emP#ecI7;lTDa#=ACDey4|@86NwsXvK-BPm#AP-s@{LW7b~ z9y4DtEjHJ#n%_AAPcX19tGn|j@KU;kH$2alqp`Q`)Kb<^wq35m=V~-f51~bG>rBc76zm~#sgPFPx^sNjgdp=7R59fpLGSBgoW|m;66BrUauG49L zpb8?nL30%dBjowPhB~Y8Gokq_JKMX;L(V`|gDMU;>t2Po&ByKUMA(~kbnYepfSe3- z$JfKjeqq*~iL1&EZP_RPsP*f~2*H;nT%)YZb1{p!?rySlV@L5a_&bNLhu86wqg{zL zZHiq+F*~~tR+MQ^xiv1GSR%d-AEQ;X?x-|^{Qk764uy;xANlp$7XydJ`1fFbysys) zYL_)aYBF#qugBL3Pr!nOjWUi@*opb9(c#*iRIxI$!?H9<=+7(f`NQm%u~ywto+jqNorVOGPNzvhS79YRSHnvSiOP7#u1T z4M|1zD9g-X?E8{JWf{`g8B9!#Wo%;`!+S=*-}5~G_k5;h&YW{^*Z11K_kHg3FZG_Q z7$`|d!l6*)680?od+-tnB&}r}#Zg73eCAL9QX{mQMih)*fR%qLsi>Q);4v>j>ld(R zo(3~Q#ghE$(ACVw4pZD5w7X_ze^{UkL+yfJW(LGH?HZA#RldJP68`}O=H9C7M0v@b zH!R&#n_xW=3kwmX%5G-?Cs;su?lIyru17_HlimWE3}F3|yj(T5YWqG`4n zqX=cX|L6kde0O~?cDklY6{T9M`fyf{PJeosg8|LZ{NSZxq#a)TOYo*mJYB&dDXqtG zEP(a+_EZetofaYcg;d}xI5s~@Dt7-n3f^EpJkA<;dv?Ru1=YJ6~&vAUe>Re%HchEpt_$Dw+3P3Ps z>vgI5KqBtUnvd4}xBn`FU`NwF%UJ>YkOWXZD{@B|ccP>I!7P;-uHA3(Fy&qz;-0D!in zjS!toC<*MQfK-iQjJ=!Upqq5Don?lhfP8f>1H|E`fj1rM8pXHSPlM%QLg?Km42Vn7 z)q+XIKfdFi?zh+}8m|hbT>!sE;8M##NetO$(1$#<(kV}Gf6%*rBqG|?pq1g9@Af+m z=yffF%v;`ijx#)Coet&Kq?CP-^rx7vW?B;=nN|YH{FlLc9o+QHv%Z4pZ^@hdaCiv6 zV@HwBXQxB2sxSi=#&;)FF?SBRitI-znI+$W(lcAkH-5v$yzQyy1dFvlBu_-;i+&_Z z3~>Mb z#8!}Q?v?dom979&rv?4LBO$E^&IT+hd{!w&PW-UQQbyG|NBc2AJS_P(Ak?&G# z6?$E1twYLw@vdYB-x)uadE{g@#-wP?_YQUMrA+irm-r2qe zmqZ0tNJLm~4a*STb&RenV{fGu6iu3HRdct8o|ZP`Y=1jUq+>|2*#I#2b~kvugGgrp3M2mcL7c-^XydYgRC#M721zVg&y;@_4 zaX|MqkQ|_cRXuHEegR36MA+n()75gmLw=EAT?rfy@Q{$5$-9Z5&v``PhHM+_`(ADW z-8g2PWRmUy`tM^S4QU$}x5ru=;%W`eyMn_(`bIHeHB)EMl&H{Cx?5J~|HBoOTwEb| zSzTe`1}r4^`Xb9KCJ?v~YA&dI7x(KN{NFl3<*QcB4bi3~s`M23(n%zYV3r5kt)W&G z6c;e!L$5i4*;s)*JQ@RyQs^n}p)>O*ArX}$7s0^@ya7)=muWdJ;|byv-RtZIjs=R( z9hPtIeRCfL^9Ffd^mE8+`A3L(;{%W?4Fo)bhG?e~o-Iu_ed4av+4j6|SOcz@zvpy^ zZ5vUg6U%j^AHWSz!@xsP8|-G5`+;<4nQ!@90iM70mrC5bm8;Q zA#e}+qszVUwi)UHpe>T~zeefy>GeY$Wlx2d{+_G^D$8xWBt0=^0fovV<%SIyl|w=Ej=JE&%MuCfke0RLhv;#T{4cC?9`R9Z+%T`t z+gHwk$!t=%=LELW+|=9c7hlF35SB znW;qYCFn1hSj;wx*fW*m!?rvq1M6HV&G$xpoUalD!O+tFp}en6Los}~Vha6?KEi4J zi=?sk;F(i6yaBW)&)>;OlI|N*GSZN2d@8#6ra}c?@8r)yA{2T7x|Z;Rt19W0u&E)F z&9JGUTGkeB$F%E=D^)5;%|G1NA@t9Ccd7T5yH=5DVhC3!jq1|beazmT`;6d6m!tgn zn5C;#L2yTOPrh2WA%iJ}36Pxtp;1uoKT(L2P_Ta zc=C>du_GXO$=`?tcgH{zP#4up#XXO0n zd{nicN;;8f~{H89}) z99VyuIvqtn*M(2SS&!P>fu9`I?D0Ya>E`3=`>2Do?9LY>rdPBuOJokEm8!qWBrXNk z3nt&)V3?rCY-m&A;{wg2uhR z122Ba6>Q-#;72(iOK;X_=e_<+76XCn6wtC;Qt78agT`Xgq420~x3DghUZ&zlX)~Zd z^D(LC9X)7+Lc4{quIbj{K7T80YS#g9_DTcN;3U|-dUy8!Wxp-rz+GTL%bcyz_D~Dd zsQg;6*AWZeq&$r{cOe(00j(aCSEl%WRnWv0SY3b`_HTOU(Lm^WfU@u)v2lx97X)&*@O4P1MG4TKMp%1hsk?p|#tVqklZj>9=6sNK11w#J$zHHicbqxD>zWA#>@@&mzeQIcO4Nz~506RvTguPJjQk z5RP_(HF@k1jDWz1a`jI8b3#pJ8j?fl|Ck7f{lY7NG~d4awH!H@Psd=`KO#4C(xcRO z=ec3;;FuTAn@POm2b+*m8LYTqX+yQRt*cd17tFr8AN8TzKH}aX6uPTT>{H}Z5gTuG zS3#Y04|b)ncxL8lzbG`U!N|#ry=1orL^bhyyUL6k;$AfXj!aLLPjY*v z1z5$<%|#QM4#m@$<&lJa@q}@P5$!=hU&Ry{gI(m|q7OsFn-L?L9VH36nUaN`XprgA zzoY`5meE>u9a#l|u;6AAN;yIti4Qadv9YhA9vEA22A)$io-hEKKDs?KZQM8eP3_uH zK!pfPlZgi72EM@vyoIfz<3&&|<5LI)3_}8CcB}7{{^)(z2{xig0M`k9j-NXf*CwFvDN^Y!MGr!5Pkkk67Xd|1_U=N` z#qlM9;3JdZX-I^mqd5hLRt)!iR7A+dNq`g>D`*Zq%s7fP$K{E)d6atIfq{4`C6W;$ z;}@f}vXcjj(u6Y79x~=wu4xDs;c!c(BE-CZ_Jr-AYp>tUfG!UX9(vK>FYd!ejX9O} zFB@~VpNJ&UTbTB_5g9j9bAQbMoc7iC@c2y+0RTviWZHW-ECQznZBvMM{}l~}FM+FG zBYvK_r-#4cjQo@}A&^;@i97H>=`PsA0RnhaM3~wK)qJ;==Td*eMD>3{N>G5YfhN)h z`yv4YRLige#PIdKX%d(V*(88B(l5yK7+LMp^+mXt#q?}n1{ffULk+3x^R_SNvupXm z)OA&{Bu^s?X*X(-pynZ^M~~G!9?%c*RGbx6 zciY@tOi_`%FBkOBhP3rDi^g&z8$yk)ATOlFOujQuV^ERxRfdfKpisgDPH$c+_ZTUM zPn8;UhtQ#pQC)pF;^#~G4$^;<`>mo^*rJHZ4GDhrZQmB`3pHGr7Kb9BGFCbEbMB#o zv$zUkE@GMwi)PR0Ok{v`4O~oJx3Osa5aJO~za;?Xa-8+n0au`Kui#(`71v)5><9Dc z?u%*bHs%?o8FvNb=ndLGhV%nXN`AXbf3-8BCFuIbd=`zPBz3Nzff59}{a0-0|X(?tdmIs35j4qS>pvs_g-p6&#i6u(-qzwTPq4Gkftlr>#h zTkyUJgwOQ={WP7VFW=0}l*hp&-04B#a`AoBFF*7^w}k6bY5or)An$ zrz-0%M8B-XgWfeL3Z@wA&Oi8-%m^9k8Te5_9MJ_Ic0%RO3S9#6{&NbqRkS$vXrnbu z|F2wSpfP5|%JtBkIApS^0h%3rt<5~WIPjoGW&CeXUG^mlo!hSsQeKAIULCNrA^ae5 z?J+kc;ef7o~H1Bv~o+OLbdwM45zJ0BRb?>z!KM|e}MG2GVt$~ zhx5qYhBw@UH5DM)^ON-QFmR6Dp801mD2wFAfhdczS?Ubw`iJg z(xK10&p`ZBIEa7n@OWm6gYva0c%dhT#fyDyDxF$uK+HeZ&Ic7P!t-Q8a(XRm_f>nJ;|M@NDUAPQYqwmXc{!krYa*xCs-vilK!WQ#d4Q#;@Sa zo%z{h!o>hr(&@Lp+LX@DvDpr!xwKyz8?@v9TSwFquD>6<(kqBqn8T1>t8`P|kzSD+ zacW4pA6+bf14_AWtI-~f!cE*f6_+dNs-`ugApzaS%P9OVh1PLhsf3S z+xG&(0!%62NGqf)6aXMKW%ubIMW+2kcfW{~NFg3fY}{>V($G1hX+N%RWP^GA1T|_7Lk22kmczAR z49+tzb&V1~Bj=DM7k7d`zrF(cHu`~)>M+%^3o{77o>Jc(M~X?m;0u-_nt7uq3~gnf zs6;MIrJ;c4*ipcmps?_E_-XlBF*B|D?^GZ?6-;Q9z`03z0JRm zrZg-AViuc#6ucYe6p%%F_I82|gitvyA9?F!yOt^0ep9XMy8s{a0m0#*o{5q=?|_o_ z)vc<~!tMK%Qj##ZR&NW(!pCd?F`LG;yJLlsPJAUA8%2wyU&4yLfCh8Bk)W)d4iJ%1 zh=WdG17xDq%sE`KE~COD{an<^b@aQiygKCa@cMN7$Oz9Y^a7@Ptg$4Vrh?YEQ_5ox zczHFfht_RhgWCaUFc6oKRzm@{8uh-2piK5+!D^ZCdexGUJCuCH4o^g7ND$=%shhLA zId{A5`RqXmpm^rL0$KOYLKR!KEpVU$O<`6Y=^VwoIU%6|pH=sLe#8O@y>AFcPJ?20 z+s{s8o{s$Eu6&Kv?tD7JJyyIzR(>Bk;bxRMR1K{Y$j1E83M?%tcYfyTY6_ibxV+l)4cYzB^c@ZI>ELT2R0s_r!V_(Ir48S98^w&GP-S5LQ5=t=Of+u0P}gxDH>47GS+SN0nQtm)j#tT52{cWx;@$Jk?ImW(1F1NV zYl%spt?PWT0*Vgt6xD{0&<&4kFJda|@ckdr6$P_zK>exbE72l*!_k1CA0!{E`pgjOejmF>)!Rc_QN#!%ZZMD!5Hcy`CwuyTTG-tveU!SCB2L;wv|-I!VOm&+iV8)T@y468BDJ@qF+ zY2|cyIk-L6rxp^wJuQy3z}e;L4Nie!&V*x1PnVUHo_h5~F_-N5hddgS&xQ!wtOh+x zxzTmxz@S2h_2ujNB`WY+*w^* zPSUBaP4D}G&j<58kjC*iU(a{cU32jJ;45c^VmGeZptXYnqveCgKA(rmZ&-~r8GU_J zDJ!I)gW(*u1Xrcy-l5ur!E!h8pXQX`j(2te%hb7`dBh{_TOFG$P?-+rpaqaGBaseE zj^B1Ghu?KqTgROwfLrw1G-eyU2tX1^cMy*;25x&EBL`bN_$O-eRu)OK&Z~lfeiRta zTfV2(ODorDxbk~VanYh?u7w!^ZN{@v;@Og3Z9S829FMml$Yxf556-M;4_5pkT?y2s zU^7H-JH*v{!&8&7R0Z01Qni(BQrT#kO-6<_AG2=0|GmzR&6I$i<=5O@vYpG{zy;7* z9T@|`7t=niTGKWrT|}Sz&w+MT%Q}2K_$1*KDH(d_2^b$%W?I?N%3--?s^nV)N&jGYfVPm+0#l$ zt8$ru{UXroYPBXLmfB3RMs^q|Yrz{ZZn1rmb=s72&K?dA0QRtfsFD9CbyLtN!-i%< z8ECH?aYHbsd1!37qV+OtZQG!q#(LmdOn>?~)1Qg^7p`=Bnln zbF59WR5;7O|S5SOSt1Ivj2udH7@rJ{|LjEow4O7>q5WKsvAc zJV?tqQ0r^AyGhf!rD-!O#+i3DFx$ja9rP@wWlbNwtCuS?@4S|+>l+)aw5U=BnuvCR zRsYKrHor_ilrpBze}@@C?stB)LDIpk^0iroyN~z`K{kr*#>6K#p$HAmK$+sTe`i<+ z@9_0~A1e$Q;UmL=yL=n-of$|xDm;jE+3Pb6WiD?g1k`QExqW7EU{k>?4-=vg8+GT$ znnGGzH#6F)Lwk7fmGIScr>%GG!FZGICz(op4WUJ3^sY{>?ia4zJF$-#%l)qA-dNWC zi5otohU}1h!3M4*-4&q(SCx$_6|1CMgkegX*6XVDjq?BklBHxSJi*nbbMtF^bh+oe zYr1!HjnT7ExglJ^!fF>-M~Q;xsxHWlmM}pFW3wuwsk`KM&8BK|^22~REUcud&`9aG zEE}Q6{TYLUhMk2*n7*GzK_yWg#QogbB~3jqR0Px7M8tcs$9Thpa&-ET~yRo_|)$UzTfcRO-B{{T?dtBsyT6StoMiP?x%xY%e4(Qyf`+U~=@2I>`DBOi}DyEU*m5{W?EH61h^G|95$O*ZFI zO6{1C=?J15O|Q;dNTHI&`mP1g7$Dy1(Ua#ElT33Oi4WW_nDU-PM(vHeRxN)dNwC># zRTv#w(L^W|P)#`s7cosfV#^f7)x?raLVCFesnK-2S+(4#D@0Pti|gE8KIsPusVg{0 z0T47z7@FNag}94TQetJFw=wATP}?X3Cn?hQ?&c2lIQ#!lU4+-`u42qR3@?s;%0>0_ zl^h3`IumFs8i9V4%d|y-)%U%Uy$l-HOV_=I+i0Xo1u= z2O{sFed=n1mc4)sQN?m33F+-A&j+OBlfBWo5RVU^p;?|njXfP(9!^=mhZ6Q=X4lNhh?E2Zvf6yFHpB?NKINszVy179&oe!+xQis!IpV>V?*xaf<<+jmZdybKYZU>2W(AfWc{YTP4T=4&PzSnxLY|9-Y( zkBS5NnV}zcL;eW3;NbPwAkh@h?k4X*D<+jn+II!2RGaMCgk^_-k27x45J$l`6n6`w z@{6X|^G%68V2T2O?0kOv+}_Q`52agRL{#;v#AL7Bcenm^OxKzH>5{zV3Q`-G(P7@ff9{2&3qfJ36eR2< z2DgHs3llh}x3%3kN!t&5;M|tw+S7J8{vjWBe9U!etpk_FhbQCe^qi!B_48rNDVp94 z4&GY5o1wXRZJzb+6fzPILQl^`OR;0iqLxQqlcT3!x+_={^fczV>}^U#Q%&TeT>u&4 z;>7Wbm|ROcgHw_pP!h7G#mVQ7-){Q3f+*k_9CV(F(3P9-FxUF&ifI~FFQpVQ;Y&*M zR^t(eT{__wlv%UpD!>?ghDj3f_!p0fz1^&4j5&wLPj0vgWe@ELuF0vXG)87wPI;CT zhc|?Lo^Rc{P!FZ3=%bp7R4e;{78VCdeeYkAnyy0jdxHq)w&X6E=p?9$ef5knLcN8E z^$CYKa3=ja0G(UXHBJ{w0mEM929R9IM>Q9RmxqkLW?-=X0h4-h{)BR-(x-HAnL(fp zC7lGfwq1>$nRm6x4lyJ=Ho+0WwG&{Ln(Q?h3NhY(t9$9kO^xD2E>#YWjvJo;H2ORmw#NXDI8y%SJ#LeIy;P48H#NsN4`G$E{ zetJTWqRO)_jN62j`l_3ogK1ONj*k;(+wP(lBIo#qo5!=}xz;`WUEBW6%&?}!P#Q;3 z5oCJzS4%sQ0>sFN=Pz217i;LKmU!*h*_wq_^hNAw2TQevjO8wm>>Wkm>+7rYgO(@= ze64|*5K8JHSL42NB?z1s`biLo`IYD6wYD+Yp_~6?v5G z5(Wrgbw3ToD6lGP7FTln?5wx%BE;ke`W54Z9{wQ`;e5W*rbl)X5oeS48X|o?bvWE@ zZdv}J_IKlizy}j(1Xbd$N$#JaEQNtrP5Fua+g}4Tr6B4>mEa6vhwi6&z=Ai+&4#$! z3bYP-NS^zHkP2wA5Q%p*FyR{A2n}}7cZLQsK8#5;OwG1zO+p>9XVA_nU39lT9 z{$sARQaoWz^CHN7g_&VJY+|gCiQx3z*jqh?M+RooLdg8;ZEPQOCJYBt(sXH5zvCZ? zt1k?>ksIFQse(xy+)R>7-RpfINUa`$84H2a;n}_%9_bzOCk~M{0!~X{%7KvYt;%`Z z`CznFmCa*jkhz-9;Vtr*5#qsv|1l&_+f?ZZPS#x^%JJ;dI1pccXn9e-eS}yKSDWE+ z;@NWC%r8OEX4&+*@?vaSN$SvzvG*YlopN`bpm##d2-~D0#MKaBMErnErFTg=aX<+xvsj`hvb3lA& z35FoH2ICO!&j@PNk;mLHt53A>4-^e@)0p^tO(MIy2LybU!yhG`vh-2ccSH*{F*F}* zJYH_})4_gVrLi_ln1xuNdmP-}PAf}V(QZ&mUv45;we+L?ARhQJd*M;NAbvnRaOJgb z=__0vW|_LKm85+T(lWo3Y*w}Y$SwE!0v>xh8j6%V*^X7zR8p_TaVL&l2h$v{dDOc; zHq;)@Ra{+y!~M_LJWPH-a-Anep7q8AEULOW$7pH$sNAy|@X@U(U;o_m+ZTtg9<&CaL}6pw(1fO3E8_eslHmsj1X=x)qmPOgP+rRiwWf8E7Hx zI0ihV1W-4(6BHhq6{ma}+iNOiS*fz=x2O(XJTl7+ncTy`CZ=9-;r7DG&O?{2o=e?& zEn<22@#BJmL*nA6eGi{FrzabEc$w`ia9ut-?(uVads8QcV0?eL>}$8{=Vl=gTa( zY{es~#az45Bcm-hx;S@hhm-+La_0L=mY~p{UF*_IOHh%5yTABG)vka21O34HST)~4 z>)xou`OpfBgUU?jDtTt`aRu4LUPhWn*>guVvtK*HEgZdM@2WnR)r~hMFKY!h`PO+= zKVByio^~X{BzIJE_q6@iy=9MapK8W03ExS3!fH}tTSks2D1#sUJtrW*1k)8F)(9;c zHTeb+xlFyx2K+d=JJ)?ha(jL_oY}^azx;u|(8XOYOqiArddi;Z@X$JUk7zi102|J} zWR9^~bki^FdlNd~g+0%-ucq^{_2n@}kK30#Y)m2$$Alw=BLJ#EAUaRH zI7`zPyc8c6yKbi>k;DLq>{VG;uD*O0^u~ihJ$({;o|`%iw$1+_lVK?(#bp%k3WU5r zvGbcz@iKYaEVSj1isw<>bxUh4Pkd?(HhckO9%Ha`)IKw5l3#bkbNyB{NX~6Sm{8 z54F}E688&(hnd8=IX@NB7?mgOQRRo9U`h;fH->Ey1U~UuRP&`{jmcl^m^KW%>rWQA zZZSL^1W?c31^xxWNS54lcvfS={rAu7hK~ik3T`S+DHU>O_xJWPpo*GF^=eaUXGJhd zQ_aI#B)=lySo~@ArhefBYVhHvqYQS3hIV{6d^=7xDYzzVO76pozr#)KU^Zk`9$MSI zuI_VTiTj7x{Kcc8425j=af&hJo4qYjaAbQFT$O`o@rGBj>2G4$7v^121)jxyVCLIe z#B6vgR$IeGiJR@GX!Sk`gr6~N8x>yA!71RP&ka@~xZ*$8cKDX{O0|LyC~X?$S+tz4 zP%^8qxP@ImF~V6KYnmwTcFHN3hx>CdPPsVgDg304&ROKKpP7#j1e+x+p^~EDv)VbP zW@m8f3qXq)+ft=i8n2?{R3?^DiPJo&4(3i|{2(>PX^_#XG55X(T^8$1yty z>~}Q1snZ*S*S2SRi|)K)GPD4hke)V*^1&W#mwj;ZQ{}z#>U`kvMrh(@@a~ptQ7LV6GJ%I>9?!AUvD6m@=kNIQ&HP!`=mg`7;!hoC#C2E3Piofs7=(~c zF1nTRFX{bg^J&x%Xq7zUB|7g?etrIs1JZZi)KqoCIW%UonHD534HTp@_fkDkHKzZO zfZ3Vis%eRql+@4{55m_LNB>8<$3I6ZF?QTx)-=I*zaMGs4;fPH;nimPduzJpE!*de z@%=o%8N=D;6Q|Q_TW~GX5o&dNYHV0o^_=0Oc0-f<{_YMg4v8;ls_}#mQxXLLuLbvG zW4%eTEe$qQArj2O>0#lIrr&lh-h91?^Q8SOmH(4CO{ z6G%E2(HUWEl(Uz9!gFEJGao(uVwHg<`6o;u^k5Cd0(}-P91&JO8WYJQQD%M4oiRBl z*y|no87ntGTe?k|BoJ2_VSl&bIw!RHXv31{hBVWE4b6-7@(uQ#8}ihP7LF|B zEgjd&;m^4=NV_WkvP22eECxj0$=67UouwwfZ=ORQCwe>s4t(nWRl)u3^nB|I&)Vi5 zaFucLJ$|@&t$7;XDOYj`)mD8z&oJD~nGw%ad~KuE^x;|?I5ckF2O06P%8NR$dGSRFJ$ z$;+0Bh1QHa@0w|Q>MaDMjwB(uovK%P3G z2SwzG*ft}6-~RPhaeLl1TGQi2>IOq~0QHC0kppIWzR?od3WY}FN9E(c{Z5}s#~i|YYiokqLbn!d-H=Ii+nRH>&f+)MRqul%Hhi&%5nLPw{e3H*3|*qLfq2}Yn2c1QO1~j z@vUZn_X*;={aOIx%VRfz;}#^2F(!1v0Ho=d@6KY2LcydHE|i^H{QA8MKBftK;q2$u zV-A#7>jlH)9>E7ycAf?^dtKd04bg#ztav+URA@GYbvMyNiiK}HrEGcFD~yWRw~O5@ z@8Z7r7GI^t;~Aq(?o<1`>TfVsqnNDUx6bSIGxKwA?f{J2W@p+0T||9u!ra?rl_ETXj8=1in*uZ{c65n}V>0ZS~)qEv55nFTXWY9g^FAX7A{ zFP(DadtvEVY*b`GAK67xzQg`F4*&f-*Q6sCG*EJhBbFQD+nOG~u03n8xpR5)zJKG) zgIoTM)%VP*GXlYl##sFAt&-Qa5JxBN{Jr@AgW$88aS*&r{FdcpxzX?4dLE2EYkdg> zxCVS4yqi9lq+@71Gv3RmKR@EjloGKRm260+IYDCQMo-~B+Mk};^6MMd`!*40XWM>~xO!-aO(OBqKTIxgc@h+-Qs zCPcUF`N!C=O6$pbUb@h0!vf7se(@5JcrXta%f(YylqX__qn9?!a5+ynv6)ZzoH@a` zl{A%g~mV3LF zS4JbIq|^L2>aX$YhtZ>KIg{*<;%L|uNVObl)_FR9X>^!5A)l@26iy-2%oAM5SJ(S{ zZ!?_+2r!!sYzZJ_L)G&|o9&)cS+!b}3O3XP!(T9cfLuY)gULunnm6&Wkp1WN z=9^O~>gxJI{MN+zy}!{jJ}D{^IJN%W1IsD(WsVMV@N;IpEu82?yNx9*CVZ( znbtrA;zl$gjun#_^{FK|+Piz5@q&e+0uOIE?R}eyEcR*1+l3(LvaHE`%eMYvo~mr5 zy#IRz4DCXd+7Zy&%1Xw3`5kROuMIdI+V1JdA&1`h!Lts=yTCGZo@lMs;H|i&yd-~t z^=-_jA%ftur+nHSuito=>a@)ASBTX)d;xf4k8$#!a3oeR&j0*&pp`(CmtlN`NtDA?A3XbE0Vm~r{S*#hn6N?33n!4mOgd#1J zjLVs-63)nO@m9USls?dhTK>M;y7tz)`=s&d`XhphJF4ixXXD2r&Pv@f3=ceGY=aXEEE&XmI^$#6ZYZ!SXNX5Bag91&rgBt*WTYo+hoU_ z=&?SIvwHsA_E6=pE7`?=N|-a(2tEFpd6Ta{^UBpr58BVpdu}czobWXNbo^<@Al!b@ zgs^$`(cvp_=ncj2!MP#Aku1Iz-~{s)_dQcCJw>_JtRY1Kor$bUrRkTTy;vk3Kbr1wEhEdle7`?*DaqvgR(;hSZ#Q)ecrE%trU zH!k`aE;kS(twVOGR^nf`Bz{Tfkd)*5&4*8(OV>1uEPCRKjR5}1Ao)$TBjvyCkE5SIedxsGnV-X`3T}gWR<_Ws05X2a^z_ zR&C7FYMUov$%6ks1=V*{SkD4fdyseuZ zfHrtfdpFakfCqQy5hy~8$FL5KJsW6G_kVGX>Hy7cd^r=z(XV=N+qNQk<(LnLRZ?g+ ztC%qFlj1amUcTRV3mke3P3C7bCKm)3EJgH}PA>_Gkcfb?exPEU@@)E##%Dx3lZ)`u z*!gk9^a+qPrSu8*AecurRF{Ui#iK($J@!;}%wu%PZuKtWjbyD83(I`-Bx37?Cj9t} z78odhSrMMEGd)_-vwU-F@x!dN>%?T~v9OqfA>?<_2j(SvAZB`gV@Qi+pN1ZOO>uEe zHtvKYI9tGy!{Tro8zrFb8RzRzp68y8=EZ97HqTvX*t*JGWp=f#BU#&BRNH80)qicg zA^P5ntOL6`mCZ3t-&wn;J;}Ocd~UDCL)JPyjhJiq$=c+pg|B7fp69t>LtRWMj|YBE zTwx4K@ZYQ6H=2Je4h+do*X%KgCeO#^!m5N$p8;?=?*86SA6=EwnJZtw=$7#!Ju8() zw7NT67Mq%$70;=${w^1bW1w=4xjcJdxPbfIr%LVxrph0-stAl3@0ADfJw8zc%+dT+opQTs%;VQQG{dF#q zwFp4Y`1n9_Qr|&|OTwHP#n8;GC|-ki{Jjm0xtD^?p^-Ebbb#jZZ|0qIw;j|7DNKfq z5lD-}n=gECdmopl>sBFR$~Ebdt_GRmyKz`|tB*Xxe9>h6-S*wU);Akvn4?qVg~V+e zzY{}AOL&i~Va437j!!#aCsR$Dc$J-1fxEGiPo6LZisg2Xllh!nv#%ZBSJ-sbn&=vp zUtAT3Pf08#MVak!v3m|mFk5takm8U=L_ne-l6i_r@6HGK&F-(okKO77=`q5 zroUK*d1T)^^w*fiC%v61#VaIS7>*GT4ppwb441fMP;mO^gih4V4{)s%aQW~=iSAI! z3r5IlnFg!J3rqLZC#-B9F}5}?$5?yDSZct{rQ3&Gs~Yb0@$k`vvP$uh;_r5T=={5w zz)RTq2%yI-6QjF6&WwJ%ZskpU;>+7O%A4Da%{`6JT+llnzL(t=>}FMP-CII4@*e4&`j&bSwhg6&PZ zF1c4DOiSyhu$>|uSM1!<)t>C0No|r}Cu11WEs~!otlkjWl}FSBpv*i)N5qCx9}e6v zxfXRNX{7!9wiW-3=kAdj)|!v?3Dyzrky`KForL@)XFjqmB<`GANP9r*fdbdn|BU`j zGOj;6VjLp=W6%)J&s{yln9|%TtE_MDa9}-i{Z?hLxzv+VvUE4>LpOrpB{ftHZqq3= zr|I?Snd}#?>(#vf>%RRreB(|tWPKor9v4%Db{=Xim5-q)uU~OGvUjmpy2DpBi9=ZS z_*|xDljz>}RwPi~fB9-oKpE(_v-H)~;w6PWJl^%|3fA@Rs5rB45@N&Da7)DPr>lzc zJ?_~lpLbh`�qiqUe4Ze2e|zY<$OK4ct6{da|#$t;ZDG zia$~xQ?2YGg4C0d>j(hiwrx*7S4_IC*29! zyH&%sVG_r-;V&pPCLAvxFH!c=D(T8oo}>BzNwbGeB+)qqbM#ZBGyJ4Q?$zdKMrzK1 z;fW^oq;j$^or$IALZsYg@NKv(nTJ9(IW6+;3u*f;ofm(H3vsRi8NGcqNQw$Pwz%c@ zl*2Mf0xri}qt!Yg&~jJBQ}Qt*yQFpUY(=?9;PG$H^+zoDU(4m>}DlUH8d{7a644Byre zxzfI*yZ+G);W4JbOTaEO^QQcvwu9$}H^WN^)H)yRU}=XEk43!{%dIa_@N4yK8xZu3 z^p?(3+}r+1Q=QU`L@$^0(aUpOVHvzr>#*z6@e_a0EMu{>@eiIH5PR|A4Y8v_P50Ba zPP9=b5RyTCK0F=CcD=;GD8>E6Wjo5n#HSTPFMQva)`D`Gs{d zsb{1?B3Zsw2ilwL zZl3Wa_nRKpGKpf_coxXB$dHmyqe50QG57I3wuGF*dL1SPI+kduYr{M8xZ7rifI%1W zrh-THa{S!DrLe

T|d9M92X09JMiVsv|9tcJUzpksa<7k2Mu&TOj-QKh&6)o8}&& zu-kCnnLp;0>V@AT)rZ*GhHm4JhRBF$GtX$JxOwhUZ!#UAQ5bdKn~%L2C4T;Lx|nO2 zW6lJCkm2K5{G)DDqjy9vGWkZwosU7E%4`|eC!FlE-?DA&z#3BO1uEGeaw6}^T2mU!p+>hJOtUuW13PnakuaD4n9`*65wdfB3k3w3`zI8vpo>)IW} z8+bR^UUC-yART^yOm03;43HhB{SHg>~ZQ2c&yMlIy zOOJ0VQ}!uQtK{@lWWy0r8%R-gN(o&)ocB)qwjRqLm~nUx8lv9^rUVTcW_csWgfFZR zZ$%6B_gY;!A^X@ez#e!|E^-0l8ODJx==_wrd8v)-q9=fx&E8neJe#2N%Dl$KbMl1A zj{3q-*~w7B7t@RR&X`l9iw8AR`9=#E0561t+Cw?kiPg#Upuw(>wUllQ`QZ1 z%om%^aG{$aSp5@T-H$=nz&cW2^ymfo^rwuot}IiT2iv|}Q1G3~9I&|82zOp1ZbWL9 z7$_5!KLg1J+t@bdwbyx{y>n&voXLqA*<@@#COoz(IXwW2eMk+YR*CyqM+K%e&s=(R zWA503Tb0=PswzLU5B8WznuK$sQXT~)n<8m;OES1$2g&Z6vT@rF%~bW9&yNmqPe6G2)Z!6>9+U~EN*5paCD3{hLlflSd*a0f|F$|V%+{@IC6WKek8zt(v(9-&wTz-*NoIiReQ;60$o9*t;XJrw} zM9ULwn=lOc?!vz3y1|x$x@U>N9%#*B5tg2P=gnRDl)u zb%X2)CYvVl%4p%nFUlp)ho5sg;=&@RWB!2GE?-m{Tuh(_zvnp^6^y4XrEk zNv$ZHwhKUGrtoewr*3>iUdxlIA=jC=ZSI6t$WB>YHBB)klu##5lWP;guSq*%Dl-zc z8P7qlz4I;Oih&KUFXj*dS7&LG)$Xl~n$s#-qoR>&anK6~cD2bsNZE=%p#P)tx>Jpg zHjCyGHdMw%4UZCXwX6S+X4&gb4S`^OHqD7Ump-W_dCbSOMm|Q|DDc*T&-_-oUULa5 zxI$r@N^;{3O@E~-W>{Fgj0-wB22v^EIk7dase=8HG;Bg5%u;N$r;EfFz$5&2-VemO zJ`n9sjbNj3^?wR^%;C#6JV3CCf6m#~Y)r>fd{J0uXgAB?%t`qP7CEYY{6o7ljtcYp zn%I@gR4J&yT`s}wUHtPjG%{^3N}t<9Vls4G z5)H@4g1rmVpHVk&N1OG-1JGEU3Ha)MAXOM}c`6_GaH z!h-_sPPG;e4S5T~X5tDI$k?`PlwCRT1@W$EtkYEZ;vDTlK#o4gSFLfZFW@OgtCZtK z#q`5VNTy-sW|Z_E%d=zjf_3*3X>`!BC}nfRM$pEK029~}t(Kf)(-419V z)E{jXA@E_}G6>5A6KI=4grP>Ct!owAnDRj}d85y)BGv*0n43y{zxQ1{4eV&2HOldU z*fPw+7|e>2F}EdaX{(Iy9{2BL*WhUOgwMo( z-yIi~-CyAK5~Nyl+EqSOdEemFi!v7kwNSvP6b@{{(wyEIc9#vUyRaX{KnfNMW+R%6 z$_IPPyjFN4#^V?888NQOev4m%iMr})&4YAC#kj1bx%yg)C*=a=p~H-NeJ9<<+uo30 z9w)^ueXh2?G);B`UZ>6mua{0^7OMmQxO7WI!c&z)hTM*=6?L0G!VQd1U+~BlONWoK z1X1~5`%sUxfYVT0A*9#}(e{oK>i$;@X{o+tHtus^4U5i`N71-dsz3`knx}b%lrLM} z@u{s)^hla9hPHdN00(WeWEGZV5u|G`qwMzRwMl6hAy-P{qgD_s$v0%h^1Z*+izkb; zhRf@%-1Aw`%CsmfSUL0`E7G!v4FjJcAEX{8&VSF`ZiSU==X_s2_~%!R7szpKy=a!k z#)g-Ix6hj`v+eK7S^4Uyp70Q}b3e9rAR&dm zaB5&kmgw?Ue^&2Sbuy=wjG`FPi?(2Rq7-B4dMwBGcrpP0SUNBiUQ}dV*sq*uR7NnH z`^Xcnk*NCuA5>pNe8C0LejWbdboQA)I|OnZmg#oEw7SFXS-beq*69?5z<(55(|-dbQLcZ!Nc^0Q4rPN}Fbzt}iu_&--A!HBL^a+0d0Lo8ji%OEhwNDlY`I z>0W|{*nD@jz};e#yjqfR=+dhcy;2m@L^3mmb?Jn}=*AU3ync7np;No}zv^)lwL%>`MSO6wpgEx9d%`DdgMK=$1C#QL zoj?kbnzci?o`U%2|MAU6G`tR=x)+cQ-E+w1n* zws8+*Q*UZ|6dpEiX^eiJ>Y&Lv^@(G!)=XQGoc-mk)3O9M?aY(6FN`pMe1}bX+&yw- z0r*D?N_3D|jl{u-lkhnL3>WIOW64K_y(Sbx>ip|Mh0ndEbl~HVu+xgporVxq8}m{V zq*gV(+Z~C{pMn><3-DWSGk*MaH~eHGWRjsz(f%_k874V=qE!c z490(sLp<#r{3z{7Gw`(kcQN9-qoPJ(R{lG3khUC{6~z?R9%GHsp3y>w_P#usZ~yU8 zJ*_(NEQ|&qb>|$EfG>)qW6hiHv_4-HEE==L3 z+)KW`GM}=7bNqpYW&@DY9g8m`k}nOH=Ce?l&$@#q(Lo-_HirW?$pvQD<VkgcVM){|MMzE~dKaBvYNRxk~Kkz_?OW z2}31z!r#ALzm87+w(-NgUOSrfhkW$DX8!sO3R&oC(?;g=@Vi6itm(ly9#m(s6AgCV zKOj>I`NCgp1%V|Rb-L;12f18wP$S-7pnos9DoRPDzOgW=ldJDqth0XJ9jx@$m0ebn z!|r?0-3)Kzp_9|Di#Oa7*!q*6&&a+AFR*j zQzYQ{;RF9!7U8wLa$%v>Q(x{Lzec|6&}^(*lp65co0u_eazF^9+jRxO>@MQ%+q4*H}6$2 z5dHf(LJ;T%e)J11!fbeXGb*-UGF6IPOFb=0rR-(}o@p%fHfM7ET2YgwOXHl~d9y0k zfZV7HV*Y+z>bF1ZxxBj-u)1JUy$*q^tgvr@m4=ejh)U(BZ^ol=GCFOoPvdfhPX9lT z#z>LiYMu~f=HYej+udZ5-Hy0(|B;Qzh#S0W>yg{?=wEpACwz4guDnF39M{tjuzvoJ z0_$k7g5AX-)8iEry1sXl&xhhtUA7@X)Wz^+_s?v@qVZ(C_kh8;8Ac=}#NqoW`i0eX zYxQs?^vdVs*^F)b|M;lUyfs8}qnJvBWsP}FH^5*+x~MFLSM?%W`}QC-!7INu z9As=LXqtyjJXFcx7{_MS_+<+k&cw~AL$@^biRfJVDt9>LBPkWW;d-nOxPD4{^W=+&}x;i6RScZ^C@|K{HZV3Icqnu^i%^J%=Fy6 zJ$C~RowHSL=i}u8QxirL22u@+UA;!E(N?$ak42R?@H>Y+ymHVOM_r$A31J_iqjOS4 zuU!q3eBgX5Ed(z!3z+gk7^apC#%_f8L!!Zz8b#c07mxq0n^`v86}F%zX&TK`!IdI6 zRge#URuv5TR^`9jy2BkIjpaCt)OMwqyXjLJ2IX=iVxAl$;C74(lubnCkC&5{mImzG zrT{xpSnw|S6AkbDZz6u=Z4Z^FY|3<$`EOC!U${U7&k>!XrjUnz3khata(Xw^Fne#< zd1()e%lY_C&y9;6yVy(=pGJ-|g{3Qk3i4!x)>;fKvhwaNnxuy6b1+--;8`Y-C(0~5 zg~$FG^JH9c*YGKego3_bwSjB8G5ty5TR%;s>o7MdAK_|$)ZOj3mD}lK>DQ#?HcFu4 zcND+q)4m`6>aK~J?D!LWEW{*kLH>kjm&D4#FrSVeqpL9Z!~CjinxD{X#p@?m74;D6 z#A4*oG9TY#Qu!?bmmygNQ-<;zF%J{a@*FrMcnBAYtE_Srb z%DM?o1!Zu9u|4H(n7Qi=zQM*nl3;&Sh9_`OejU@=yi~+~veA{ioQqWIErAy$2l28c z7N9z4r;xTrr;-c+OtIi|yOXAIeFGuCVy@>;#9%JAjia4vsK;mx=LrJbifWgb@6GqQ zL~%RJcxdt15;j3FdRI{z!_Y^EYLSg2Ma0%o?uE$TKeuIa*gEPAG=|CH0ZnM5GyvyH zWk(fu$MQXM;HJ?Pr&{_^iXl|!yA%cOrx~WO4O=Ph9@$3aAwpuQl^Nui z%mwJ*PhC5F_*zXr!#n7wP$&u}_^u2~z&&wRYSKEnyz~h1_Gu~>FRf#cP~Wm!_cZRD z_F85%v4&TQ+CK+r#6^4LdHPn#cAcW|{6)JpvUGv+hth`cwsn`lvZ`_|l9;7VdVmXg z>MW@JUOHtW8!3}JYQ6$FiG=fF@n-J!1+TzdU$=#EmswQRsggudO`IvS*Kb_L2jhu!XniN8 zWznl+>a~Qk@+m%ivk2#e6B{NFM zgC*WiLxn^W?C4&EW$5-7YeKcifR2;~c}D+U_L2a~xHsIpZE3+cAmgcf$G5v9`S}T7BR>Y2U7&0vgp&7x}FS5v*bYs)UX(SB|CppBg9f)J0zSxzc zVFTJQ>d`esub=-m+W%!~ofI)*UdowR3?;DhVA%3yTUIkeL*gXbfzLG1?B~F@K%yN7 zdV^doB>mEZSfbBgEnvaa6+(*8e%0&{E8@blw6iJ5m8GuoY|pmLB1`j=81j%gri%w7 z4Hcg~BTs4^)|BDh=*jtcoci+%=b)Damt71aphcQL%CKJ%a>NzBOP?mr?%)_EInv2e zyP|xcOF~=1LT&my)>FjNV149t~T7o*D?9VRdrcl@|7X{8?GJ0TrnU+qd|R zj&I#5+j%rpe{P;qZMOoM(dPS_sQ8ypbHEVB@4l~@heev$c`y~#PWJ0Bi^+>w+93AC z31P|C@bmQ=hYFrT%XN!ca-{}@gz}E!%;$fPx*OU-!7bWM+pU+zO>8rHTNpH%&2Lr^ zqDCZ^R8dp@YjNM?k{t+p(E|l8RsSJJ+&0@E|2p)pI8JjX^rO2n(Aw3~czQ)p$a7Va z1~bES%_$EDL|6%|=4ByibM2s1ZuN*eovGjyGAod0Ao1s(8zRTBy5qZa5`ZLL`A&ok zO5<;y{)I<&zDy7en>{Rzln5JLDR%X<_$?kTF<`yUmQX!Q!om>4Q{t5Djy3#jZs(!X zY^?`eCDO#{B%t2hdLY10JTM&$p{o?xZPd=oA-}*pFk{ePc*7en@5rJU<2RV!|14I~ zyAXp0@TId+*BtnU9o-729A-T^niMqQ+aNFN*4jO>$9rS82yI@OEMkC4T%D`Mgm|k* z{_r=xSgzC4%oc#)oOkOx3PRxppwBti)lOFKCWi2esF~jWlE5Sa3w@KJCUxG zx*GG3qV6aTImN0x@ijP)M!-hZUb|A8MX0TVqn?@^+Z@@>{v(qfOu$P~=W|{Zjf5jh znj1Pvu>&`cZ@a%2Cy9u~^9vZ!oD_~Db*6&S&x6hcKj$ZLF5m?Ta1tQ<&e`QMS>v%p zE0rSgYSzINp0`^edE^ic)bJ1E`8+bK6s(&0$QOpbX_{J)TwUdp@mxNX%-`WH0+5iq z%l3(CH}919SCqdu?(*H@j>-WUJ6qt$iYjHf>qP_k8CT1Fk8}y(x0X;7$BB7+f8x0LTEhR**a`e$-E#EiXOY&zwECr59(YdoB ziCCJNC${eSI<Pbt+QGld*7Ngd{VXC2?itR36Y90{nSFjY=<=!8suy*ZPtqE8gfjkWiwct+==lA|ZP zU3RkNo|$^P^Yd0KkL#DI>u$yIq{zTa?Q=HV`nGfSoP84ud_l7A6Mad)9?-%8V$Xu;?y9gNTrY)7&iZxZ8T?hC*RkfOt2}&oB!!4{TQbc ztZ$t4IrYV=g=+a%4>p%`Za|*ETLM@U9jgekscT}iRZE6(TQUBW(<;+LD{4GC)wlZX zNP8-S9CC)=p zn$n*2_R7622-4a_;|lxJ1@v+???C^a*uaea0!#g}VSnBktokmcX{%lY@6S-2hpuMB zGSr@rYM`cbv>gqDBvwy-x5g{~I3383$1c+JZjYXiw`zWbKP+P+B=6q9dJ~~j!-h~d zsFscV9s{2F(*E$Q1R{ZrXj$>?CqR)Dx9jySd-5D!V!_Osd%Qqs&$(Vfc{bKUq1c|{YUp(@JklQ762G_DDvyyms$^^6L-m#viC(W@j`$h zmYYU_l+w@OH}G@e0T2pk{v~kp9@z?X#DLFJrM9xwNu=IFE`{p~qyNuL5Ftqd3b#oU z0;oPVZ#!gF?5|DhkR(pIO|I3vp6-1X^~`k_^%U$O*Vu{d8V<7yyKga1;hO8XsivKb zPV=S3m3MV1z-5#gfAoP1b7{25ABsXubOq1n?gR%Wkh5aD);0xC5=*hd$9NQk1$kU= z`PF6el|XG)&SFQNb`Q=fW2fif(lk!t}Ywmb3g=FJ~fjr@!uv#hTV=IGJh!9Ys# zi@vRRFmH28=*~r7`?1D@3$^O=jbhC&R>$VWrCa`8x*13G;&coV+ zY=S`ms#Yr{L@8PMZmMe6dR8oun!v+=`ya}^Q?-`OQP51ze0`4AHm z4>D^@I6WxssEG#uZ2VRhIBUJMXJ_Kk@M5A%ocF;qtB&`~!=UmI@ijk|uKdCSTms@j ziOIZG#tme#HG>qzxV7b+?>(+%fu&96pN&E4B@X_T>aOMmRPSKiCi=ZnzL}|BPzpG! zr4ogh1(WV618G^kV5;#(Fk$1UJ@)RAN7{5OF)ErdUS6|yrb$Ia#=s7y#o~hSeg_e} zFe|>(8B#pZxA*TmHxU!k*8!xcaT-p7MMriIyKLudt)qs;TRTLKu0mvyXwoh7`?|&! zhLO64!H>LfX+WV>bhFNA)v~u|ZG3{*ggoRTps5Xc-t*sp2e0S}CGW`LQ7cP%+GC(Z z^eoy?8(x?g9Cf?%}E-9hIluQ7yi@J{P~U&Db(w|U!xm^0}G3b7AoetMjpZ-W^yTcz_?I4 zZVI<)vkQfG4*Z1q@7x=F*A%s`!bxdzTB&^sFhlrQ;pg6eKw01wvO})Z{TOdMX3z{C z1HQUZ1zy3CA654VF!t}@qL~)FuuS>fbVGpzpTqoL)cSoZt5OYeI82@F>VqqJ7T)7BMOW2yS>QD+FRIUT?aI;$tg{< z*@f%y$r{$so2IseKjjCj)lh!AZ1N zS%=0^q_3&{$vN%x;gYhto3D9!%1OBw* z`6O*-PTQiJti~EsSu&T>=UJ)Wf%}mV#@n1_AI;Jlk-@p!p{J1caEN!&UlPiz{qoSU zGu}Vk8Iec4oAI-9pcR4g*^w`k6v4GZL7sG@Op}yvCGvCcC$c2icyC#Q<%L^WC5d@c zfx(5|29jlN>-}eZyjR@m>m*jqrBZ;79YJIeJ8|=zNYRudF5iKSc$%8HK0M9+EQC4_)^v5Jz(0)F}J*eI!7=blv|L z>&+A=c4i$_!mW6 z7BU~3leFQT04CDnXxtI7vYQL!JJr4gQNmn*yO#LmS7dJ4 z&|O|P{iMc(OIz42>l=|ei9&FrMYjtPQ!4kOu^C!&fJ+>(Dug`1eRxaCeSH1B%{rRS zd|x%Dtv2Z|KW-AfRWt9iX4vzna1wAvt7IbfGJO<{^8-o~b7lk@+n+BxGIvZaazhG=%o_Y zB!0p)5FSJ*66JxGI=ZZ;`Ef3I&&QqH_VR-Oynzb4*b9=!KS2Wxe(Y>C){ox`^g`sZ zjfzzAYEU%$e^rX=3fFmyOWu92o4U|Nj;N^AEy&_5eoIC`|;;4MvmP2u97gsovNCKc;b= zYPSpYS!Fjj_JzrpNI%M0I@da4Fk4Y1j@R)!Zv0JUNcu?$)Yfg6gUYEw+EP9E>pT6j zZC(w?U2$j)EVZZzc3%~IONJrY(VO5GgCif5Spk2^vhDVwc~1^zcX4F63PJiZBNR2FzpM_bl!Z_o0Sqq*Ro^(m_F|c5vwv&Z3X%Ohde~YXf71(8+3i@>9j9oo2b}Z*& z|Eh9H=kg5s#=moQ+6rn7Y4~S!)*z*Q&n5OrQ=u_$7l8Vd`8XO3iKPMwzkW*8%St_m z(lW8VXc@W%m1d2%azuk!)p0eN)P-Kj)3ehlQxElN3FS66>ncd zbCaTs$zetTGCz9+oihnq&2#h;nb6A)4Nxhlg5Xa4cn-LFJO<|Q4tc1EUOsfXQ#FdL zPN07qUBm9c#=23MIez)+dk=WP!LhHWQc)`_gd(i zM1noJSbVqx{oLx#XsCupV-bYK9qj^%0|B|_bD)K$pX=I4+(SE%Kd_-~JO9VUktfW%>udL{er z&b9jRuTY~@DIf~z4fFPXXt;?yE$XUZV77A)KV7m|%T}*9^{@6t^jN9aH}N$I&xD)s z=FE89@hm}P>`4G%G=sA*WGm3M`@1Rt`O46m{ZbjRV{{WB)DPUSs!6h5c~Qfkt`?EW zLiipA8qeBZ_=a|8;t`cMGQ8~|H;lFQ<(Dx67Tao3z5K$9?tdY8hEzcFGG-8*y4S@$ z?=^6M-6^Kk#aov!l~0V~YJ$&fTFU_M*wWO1_de zDeK-%owRZe`EGrez+UA;&KIcg3&uWpS;&EEQb!#8=#}ppV4a-+j#5&<)_VVW_PaYl zvu{23?GWI&UH7{t<}f&h#@1oSQZqIBpvf5!*hZCB+8!mCdc2&EXInM3pH|exoYXnOmp%_we$U^yAr(fb6U_D3CRFgyY4Z`K>?(w=FuGI4qE}v|?zJbV%)d!zt`AGj1Xnk0YU7KY!bH!*Dy9yo-?K%%E?8BK+LfLMux89J&Y2s+zA8ER-f}tA+N{ag@6Cf!xNw;l zK7^%o{4%&-r0L_Hn&PH;`U_pKpFEXW$Rl;6@nr9>5I)00+uokdVx#V>&G+cq{C?ey zohm}&EMQ`2UtIt6F2>4lzFXBT$R{QBMD!i>oXrSxT?jwr z0ogT6Lhy_6ao4nZVB#SA(bAq9O$6Hi6f76#<0FYkP=ptjd_K}@FOxhdRQpPAXtZ<^ z(J)y8b}BwNYfBC5mYT3vI7QX*T#6bUo3a;2_e_-_X&2o%QB1DW( zE5lG}$bH*e0tXkB`p;{WTJYU1A5}j>z-LUt)#&68P575pjP(;(L|3Ncf^W{m%A;M! zEcq#orShL0ZftmkE=W7tc{yBK=5O|#Ekv!8ZHUosTyX@Feq3_&aeYod-)Uus??9k( zMG7D_;T~E+y6H%dCZxpK_Jt#Sljr z;3=@Zg^runc-!EL#E7E*-E4_=*A9yC7KWx3eIf}>^idcfpy4$Q^yvSxoy(`wX~aaw zE}(*9g?jVzYx)U7e-qsVHerOb&E?ZNT!uO-hI~psHh%!CfwZs;XvH0Oz7fUxW44=c z%)hVc--bWAxUX10!M)2bm%5xB2&i2&RPi@yQ}C%s_=KAOmcDJ|zNutL^iAzk`ag-$ z{n}m0)<_PT(QsKRdOsG}v(ZolR-1P*;t9-6(ni@Gr|@E2-H`oH6H%=+!$qsF?#%Th z^SepIZOIl<$f5kdZ)qlC4jd_ef9$-%0NcS>+4O2!s`SY4ItfDge8vjPw*6*4Kd+IC z-P8@r^l*_k0SP2L(|X@pev}upwx6-7_5F*F>|O;o`jCzaMDRPQ{@TN(vWHG>jsnvB4GRQap7Fu@aey4sn9x0va!Bl$jkYsCDw;%vD z>Q-t#tJvWR?WEH#xo`($s)DuJieMINh;e{F78Nz&5%mgsBfni=gJ?4|%}R;seNvmm zZ}~)4b>d1`YL+M=uMY`cXJqa>$h#Qw z!d@i%M2R;}t#nF8?n^^q$M`RkvKIy;L8YGNO>HlRDPDt?OYGb9z7}0BE-ZOIw#;Bo zc|NXP0GKM;-}xt}Fb6Z|;QWlnjkW)sh<};Xto( z;GSLd7UPHdX$y`I?ja$Ra)Bv<2!iQCxfeo;Yh<8&Aa_zc-ojl&{7QyxL#_9u(wp5k zS?r`8_ubG7D#b7I%e0c1m0bI+pOR-jd1(xP77{&Z635~$P11<`n_U!iRuU3!(3!AV zU%2s;BmT*;A&sIka`uv|D*OF_vlhX{cP#WGoxF~NI@r1B2$`H`tk;uUxDB~+p?#g3 zl)QUT+>JD>$@~76lwM=jsf@zX=4%0ZR1zCj9vl9n>BBaDEIG#A!@D;Lz|x*pyySed z6<*>fI|CkYUCBxh(^4H%c=o*7-GFs;M~iLCq`r#{is||@X1VS2j^>>zdKneigg;~x zTx(oY+4JXbJMzPNz3j}1$1FsLsaa7c+D}nN zquD3mQ+N51ASywYX8thEW?1Xpm~xF&SlUt&y!2sVW^FUW1?DOwG+mz0pxK3K1mtlx zCKzRHbdy`| zU0eu2mSk5DAkf z%umHSCq9L!1kOSL;I}34HrM#@IW6Ew*0AZ%a}ZPlfOs}`BA!Bi@{B;OFmW>uZGf&l zg|>PQD8}u%cm=Y*}=<8Sgv#yAbD(?pk|KsNnb=)*_Q=raM4aon7{vP9hpLQ z8l(}FA5a%_bjkf8n0=p9I`y^i2>TOR#df~sc&V>rq0O^|#3=Kf^a=#U%Tpm8>y`TU zlz&5B5E`k?x^6iC;TP=*E|#5!htjHb5J5=V46nDelQxIl( zE&)Um^=B8zX1&9k7fX6V$Jp0NDUr_$5=Kc-N_^h)OON{Z^C<>*y}1N0W$EA&8Hp-p zW8u*ld!?2~ygVs0^}QMtN6Ct`0MsDnEy$f{ZybBsyO8H>AGd!6k+Lyu+&!s?#R92F zjAZc%;>fum#w|lHI5oQD7id;$0bYf-5=5_I^>;vQrD zHbVw|!W&Iz)|;edojko7Kh7mr9Hc#)>?z&|#(5Z&j!7yhrY;{HTiq;FP~Fp)Q8I3Iy`-I9)?dC zDWJL79>6f4B}Ig*Gnn$NEV+@GR2HKAY(C_!v14H_?KYQw4!P9*u>{1AAfeq#r~z8I z>{vmuEntji8Cy^VlcDh8Nu+C^Kg6A8=va-2nN$XJagNcuPE6Rh_$mk8@V=Af!S)V> zi#}Olgo#2ekIj`n1IE!1BF=j%(0#UJ;4j*x$=a>VHsbn2I_@ zE^u?P{5h(Gw_h)<$DwjwJcmIm1gDEHV*$&JglYnA6UE(Pn;uKj~KH zqQ}@7ztUO>(09CBCuSf=xWmot;gWUrAi(bUVzixenu>3u@T`0T}w%45r4~+^vm~zWDh`Q4Tfsbq=U-d;4&h z#`$mkYhtj~D1i9H&3%u$D&Y-x@OwddP09XVP=?$q%G4RC*RLYaJDm`3A~$-EYpmQ5 zt`u}H|8gyJ0#DsKYcYKs712bP+5wgS(g&>2lGL0C@j zrf#CNo9JbCCKKNH4JrLM3Ng7ZnN1y-iv&4In=(44Pix7aO07854UDN)Fq{~!Zg$~J zKoUbVt%U7K6qC1M_JAQc(9Oa>mY3mXaHG*2d~Sv_K{`-!C+kN-&UEhJ2YUef3oefj zI}mu9S@<9!VFzvExtst*w~b}Hv^Yg4Kk=XEivm~Q*mNj?=@}c3>3%OxZg)lgdwq&) zzZQ-Dch&tF;hA2Dix7<_j_0 zh;7U69Nd?H=|{UMH=$l1Qf+7;@zo%;m;A+$Hl~~9>_T`9=k=0eahIEgd~c>ei{O30 zTgZddjW>O%3`fIH8KTcqXD~aX{U$pT!`*@Cr%T1Q8!)A8W^JS&^tifd3IbWS%nII;L=Ud!84eKrO2tdExFH1??QIiiZq7EyijnUyOsB5r5C8Z-Lcbi? zt}=?Btze9|ZreQJp1;eslN~*|(JF?|z30~eCW=WxHlv8DvyPZ^*r;>(F2i(t1FGVU zn|E}Eq>lT#u;HtrRp5M5e z%HI=c5sC$t`&d*Cj5z|@6qGFP3h`^(0oC0Ccle=g2KXv*KcT1W;kZw9E^2 z?ZfXD=sf%%9wynH)B~vpDbp?eG$F19m+;hW7RqsobuCo;_8hB94DQeNg>uqG+POQY z77dA1fCCOH-a_Nia5Bu|dc#)Xk|y@JD8a=k#UpTk_5-fK)Zl z9w)7b^jP-Gz46{HViSE=ronpt)Z_FTou|`zIrbgMPLt%^O@`Z*J{aNz{@X*OX8Xfm zhP^vNVBCoc?PeEYrVOVY{aX5B*QZRf-CrYdO6#6V6y24aW3)od-O2Uv3S3@LCTw_P zDL%|K+|%fuPP?hfEkbhP72#Nq&akZ94RzW;+E@?GqjT{FxL)?h55gM7;2kAO&}c+k?41#LmSMHcUakeMkcY(ESWxE2n*U1NQ%T9v7G0ou>GMuL);h z>2Da!3ycy>{S`l&WU*^tml^=cqocb&yGeT?@^J&}$@s^>ik1XGJ9urQ8$Up><+N;5 zD8!3@?TP_P-i1*?$bw@!DWmqJ!anFlHdW{2syDhso)|6OmDS=S^nf3;!yMKgZ~p;Y z$21IYVeB8k21Y&cUjh+=oRes~=WPCwsf|+J1g-k%6N$-v>FdhC<0KzqJX2FP`A-wF zq~v7vb+?{)5q&QS(eAe}E*u!pIJ#r*E*9td&2RTYmR&-!{a%T_&>L&rW(24#K}~8_ zl)(2#+wQ;DKWEi|begxut!zqN-23uDCL1uH_XZy~V8ER;_aDQnHb_hP&z|i-CF;U2 z>RhZygaSg|4m22V^hRxWgR>4?wwnOkcll@B10$OXeM6fG%k2j!mKBY97sub?6F;q* zA-uY2z!fKs6^luPle1(5Dg@=&)hRbJ^kJ6GxINE*vE`o2aQG#1f7GqaxEa5mj;_|) z`FbLsWWi#lvV7~jxTPZn_I-2-%0f%uuWO~XI5nzyg1uEOyzEc$(Gln?g0adwA$ zU%im}nffUz*W+xj+r_#}*p~{3so0RQV7$;?IeFxY>7oDZybAx8`XW^7@9%gAX|fA0 zZH~v#rgyE)1E2_Tj#Rxqwq&^GL!)AWvDDnMGm6K~R$@ZfFa}tX+AzXN^nv^+L@^dh zo;mDYeL#t(_wXm~9k^od{)D^mnx)h~!CHVVqJM~0wHN9{x9FjnhZm>{zXcVdegO8t zp50kB7Bu&J-H(}{w6<3ly8ZZkj!LzZTmvz0Bq!S@Wn4qqTgx^ty^d%O9qJQ+P@@44 zXuqLC)H1QD8mF{L9BY`XY=66lhcp!4hF?pTOq7@YU+^^N;^*rx0X4|9$wt?HUFTUVeu0_Om5fb z6{hNli;;@6U%sbpbI!M(*5flTgzH+p+ko@g*mg*sd{~4$Fn9Q@(<=V{cI5TxZQiNi z$GAdQwmn`gmNU$H7X-wt64v>Y4EC4iT5{_i0|9|`nw1*9%Sh#cnIFy;?gvkbM z>-l+^^4Uh^?%D_v-R?*#o}9T@(zfi6100x7jz@pLBu&FawctECxslq~j5wd^<9@+< zRG3UGJ0ZSrUoBAcFSTW9p{jMSCB0U&+UO`Tgnj6Kqn@4b+#DVK(wz6SzsQ z^fPN$`2xwdf}IlD)Z&YWz_lpvFPDCAggxjBwSUzw!bx0vCf?ii>Dxwrv19~sk^`NR zG&ENq(s>6lZ*b&!n08g$GJkR1hu9g;?WP;tqBmW`)m3fjc_t9$H!olVZ~HH4 zL|4lN?lpC)Ds*qao%Ei<6+zA-^TUyMlLE5?g zWd0}n^`Sp&ipZ{{q0yj#hr=oJycqx6OGCS#@5rYU7INI_!*zn?T!65(=jS=^>h4;l zAxYOrq#>6h-9ML(X#XanoD(K0m70r6yS7M)%R2jSIgLbKPd^_14&vRVccLlj~i>?Un6Po(*xd^PIjU>FtzkdNpn2hfwA-tf8iY^nXs`4A1e##HYx7=OscEtkAnDfs6Q_{>RAZ z?PL}!fA>U8Y;wuDx2?{|LP;m0`VNmugYR<&0AkYQ8CE=s4>r9WDt7mI{odLIg%V2q z*Z@F;?5q?7fBhPwb|H*$!;^6(H70e(*B6I(G(eQjkXX5T2U`w_d-nXr!JQ~1+m1eP z_!GCz1S8j0ff{#|wkeIF+6KQe377KIcq(rEVJ#{{%wg#*5*_FNVyS~&!bwI?<(pc+ zdzqa9W|d#6WwNNeyubV3Wl%XN(h6UNfy7+h)LdDqAHyx0Oeg1ZgP59zW1=p)3iikA zOwh|^OG5db=$TgDEv7!Bb%=&*a)Q_$$r7__8fD%Y_}Y3RKrq$3xM@9{>&oUdz#HQg zl)U2R&1`eg+^BgF$zyYWV5|Rww_nkY{Ki#B^S!0PHghK?*Rq{EqyK8M?l zrPcOa_;Y@A@|*Df!%a3St;pAP?!Q9pP{EO67I8pFags#o{=p3+9HP;G^+G26{n1&4 z*I4F&V?!>22p!a+KUy?8SQib_v{5Eu;Gj+0g7AEuj)XYFLU*EmpvV0FTeL@AgbIOKdNWO5=MjW zK$i6LcP;{}43G+$cMRq7M~TGd#jb(9;dgXrDA{H6(xJh-x}*k~w6r(5f8)ZEb=a%t zhy=N>TOFbwKzrviT+hR9-ilOlzkbfe%TQCEDg5zCviU8GG#s8B(wb_Vu%ikj)rp^~ zg5Mlap*{(&2NPZ}4lq7AceL%lstIG?VLWLcn%yCE0dn$gPSn=W|1{LvV-+ZuhGviTdU@Mi!Oo$p|j{WIex1BRol4n6-)LuLpxNTeS5PZwF;^yBlyTerpRX z<;$VGUO>KLzl_@1R;w9v?w?Ay8m+5^ifrMBV8e%uBh5QYa+WJK%*CN1^3Vs};6b@u zS4Bskn$`b$j_HVi>Zntl@&zD|Dj%O)CG>~6M%)!^Z;v!qpgE?&Y6E_VBXJq0t zJ2=j6_RHd&n-xE6AcM0_hW$BC_J14STXgxi)|?OPja|lyxad~BF(MJS|Et!JY*FLa zXn~A!vK^4Z`}@l2&r^rx+6L4E7~#98l)F*^oL>+3Pkt6}gUeH)Xce)Of)6oWMfhJP zAz)i(E)EOHK@145K)W~&BlXfpuA*ym!m6hOP!=7c zvT^${i!g~dyifj6@RD1ZM4igvxdip&SfS5bhRS~13e9r|q&npn;w%4nwo>dk{02y^ zdFhom)t#ic?yx`6CqJ^=rzkB@foZ(P07L8(OJKr^fyI_{s7ER5zY+<7S;0L=>R>i3 zk6!^1`w6U?$wH~GAEXxJ<+S<{ziJ0}6kICcrl_L6u$!vRmbV^qP?eI>k_tua=RSuf zbV5n^1=Z}aoT_Q616Ho3e*L-$C3I&?l9;D^f zO^fIaC7q~(s)eb>2qM4u`tRVIs3JBgdewgg9eoJ#NNXqG)ekm41edYDdEMh4bRuxI zt6(@8T3T?B$WG~?Sv(sUsO*l6x;6cQHz<&j7se~rGH4L#mLvk>%xEaOV4;nbC(>nR zXd-XjFQqCE!M&+y8+ZwI#-ehT<$wq~2XfV#o`#Jr4_@jRM#bKD0Ag!~zfPIogkuvr zT1TBf1U+IW<3BilEIi13s*Cfzdu{OYeaj}y(7=|HP7N&VFdc17q{FzCuS*qE zeH<^UHsO!`&tm8Qqv@)nqWq#Q-O@31mx3@L(jl$1w1UD6B_Q4PF$ze`5TfJ&f=Z_} z2#mylNQg8dDIwh*@8kF0TkHFG*1BBoIrp5s_uWSvyBP^MpTukA!Lz%4(9TfObg^fM zHDUkH7W*TRWzBqB5W!}y7kH+el8GQj*t4K>h^+^&W)5f1hbBH9l9?z!mDV(}s(8pT zanDy_~Eo|$6`8G3Hi#Wu5FR5LxE*s|P{5z&3YLk^TAuyt` zG?x7!^fB;*2wR$v{cU}kmGsp^xv6p>unba&Jr_fN{vZtY5Z5Z=~sVchYFaMnLM{OTMVI*VzfM&ka6#N zY7Yxb4fM4&$B8Ex>yb?`R*qM#RYRRsL`j2w*MDkLO}P)RoSw04J}`T~`?AIlFfm}~ zS(4iDK42jx!j7D>FfN$b>5oTq_XH^X@b7oUIAGHp`m#ZJ6^ZngCmP!sT@zv%L?4=P zh}z#a3WNS7?&0ID`DYh%WS+iG!V20qsfQwiXP^VA{0)CYvLp>V9C;s%Z}X(CXP5s5 z?hISl92pnyeyHS`*97Shtkvo*yE6&L*ZZ@oRIa{_ce|oX3=CwkitwRruWU@3=J$H) z*NJhy(K?2B6}opdZxLML!^b|N^Y@w`x=#(aZ1EN*7SJ?vB`tl;8E+wx4sHp5S+gXg zKf5@q9dY@S&09C-KBf8U?zI57ajF|HkfYTcZzfG-iF^C*oh!RrH&6uUq z!c8R_;EdSnj=~3;39;;!c7~*lkWG}VheENplgaZYx-;FDn9NhTcw;4KppfDx-CQ1kg;&iedFBGophAd1+cBo@P2+_BM<*a8m~{QaS_zN z%4rrk$b-G-(j-T+)K2vQ-SnW~OyQQ*>z6)IS#uuVv1009wPifqn-#Yo*JNAY8Rv+) z3>}YknW#bRr{MZpH{u@V53)n51%Ii;mYx63@~|HD7BT=aq&Sa7j_wz_eNk5FT>-4Q zw@_bX-vIX(TB|GwN2DLO{=>!8Mo4Eds_MDB3q*f*+R|*#Ls6LFq&%e8p|Qv_eVwd6 zkJQwZZ+qdj zp;1r~Ce841o6wBPt2%!^3l{;qtI24i9_?J+yDF&IVJVq5o+fuBLlflDggBv{)oFJR zaEkVlVIpPk|220JRG2i6cs5WLHN(_0blErU9Ht#$FsI%tmamkw6M7o-4nA#M^TI&d z9}Z3^jx8V5*q@Dt#towDR_A&El)RjZ-`qABYF4OxgmH4y{I^UWx4466u z)CHg1(8@N^e!z!nlt_cgXei+yA{)7e}&Eo8p0k z6Tmpn%D6sv5N3T{bJPHX#CIm8=0gyzd5}0S=3K=u9r!Ey1WID}^3DL-{ou5i8Z1#f zozBOMlx+GW>6sy!Yt99NNLV_F6~TpqxC22Y>C(viPYd4dEmrIpr1sZZ>okt$iuF-? zgwO7y&NR!S3eQ1mmT?6?x*)220iA(;aU0S0_HISy2ocO8w3Ar73b zB95H4mrKx79i5>Z|3&GGKBqag_PpcQ2*RR-Y@5-lt<1;UR2G9-zFVv@Mc2OuI&> zEGO>Z_6f+M{wS?LJ<$*GmMD|T^Fk{nupZ0k!(kDxh~76`jeVkVZ;V`n98+jv>9`-0 z63%c!9W2*gOiSXDI+k%lh95!KUnI%=(k+~{)qM9A0-c2FB>?`#-yRkk6(?yAA0yNf zNp@7V8|tK6)lr{KKN#gYOSi;GrAFHONwOY3tN{RIY>aA`vb|Ek(^ud7A;%U`j>+1= zx5$}~MKXor-&sY!WzH4&($ZsY)0zdncGR{4ryUhY%NZGu3)wkL7%e;*&L!B2vc6Wt zW$3#6xUM}pH%+$1`*rt)6rcHfX;RkXKEu7UYofXjY>P276WJaEOM~yk;w6brn4U{x z*S-TPHi2_vJ*Rb@cdxG4>txG}@BUHM%0+#7{&BG5Z$#gVL6t7%vQMu)$~{G1kafF@ z4^(7uMezj6!TJu~?O&eafiGf@?W@P%BnDCxuCC_&SMh^xyijomnh}y6C(`w6a;XKt zq zHQSv(JYo4?mJT$XU@DCA-;^7%3RqsbVl^aU`Z{RWd+p z95~i$S&s8(kMm{sHoiEX27p?GV8~IK`aMkE~#P6RiFL|35 zAHCRn%m)QhWgjK*D5lU1tQ6LZFpSM|AA~$6Uh}&Z$Doiyuz@%OmSB2S&FKZ}6S?bc zf?U=gw?d{aF^ir54*nAf>@!r@j9-a7lnr_njH~=Zwp#!bNZ9$7ti*4ln$IXCvR3(- z_sQR zRYvQ3g_M60Qv`&TPf3-g2W>Wib7f@a2X~%zG9SA>Xi$22&%-UmDSq)C;=g#rsv~zZ zXJD2T%}#q&^az~6JlNn*fl*q_=!yA}_v{(NA;b_poHOVo)p?KCV{{_s%SNwFr-a5=P? z|7&LL1dfj$$*#n|x2!2ny9b5Z1zDXJttzwZ=fqeMR7UC5_+J*~`vW-!r$l&Q-`0f_JwHARCc; z6`z^#x@iWeO0@#9ibZ#yZ@Dd7_vSJl75@||3b3}x{WLC!~UTK==L3&xi+&a zF`gwl5nb9il=|T`N6$BajvRi`e`Ri3wa4)@8xPPAriM=^*Ka%Pv|oC`g}W72Z0Ce+ z<}qovik`Q&30C0WfxwqlU~e{6bzj0CEoP_HM!o0I2)tJu=)y#)j#oJ==G0mgKH5xa z7`n!b@hwyyn0gIT90~$Jo@15ckdlkEac;(+uBNL-lH&Rw&TewDvqWLU7q?dwG`u-H zAgHb;wU!Eh{Oh}NTm|+D_!szpN{+FHEaW79E3{xG5o<#cB#^@`?(oRxrT$RPf{bMv zKEw_gm~tT?B+s&vlZ$Q|BnMluaF~2UM6EG(@_0q&zef~f)%rm1*C%~Jyh7$B#jXy{ zzBtU^AFG83?gW3?hUyRC9I&Si_`6>ppF4 za_#kK2Dki$FEyZ8n6Yd0oM$;`ZoJYb1jh<*gBDWF{mTTHO#AVxMv>{{o}5It=c8{Z z(B~Bj#VXp9FCIxTA|c7W4UP)VCnOAa@4gdCk*HMHAkKA2@h7+PCh+<`L-O*(zG5V3s>gWL~141-)Hw6r>iCA2WH6MK?dJhq{q+MX2)xNkE!+E+4Hq3XPWgNDJO~ zR$qO?;vEd!f6S(`sJh}EeeQNUQxmKD#|q6ywu^&81)uTqW;D6Zv@t>R``Gq`z_P_y z8oELo!TL{Sf!=-}_iVs@M4N)vvg8p*k&r&_00laSXoB%ihtv-h8)3ETXE43+YS|0c7z!yv|+4EjF~G!LAQ`Y!U?%>>`^5EAF5y|`o+ z2RaAEC`$3Ctvsw!5>+^06jjR_0#@;h^d0LFsNfWBhUot*IcsR1d#7%BRcG#Guf2#LiU-7j0pY@IFp<_ob{UoO;NpbO$Rv&%z=UAa zsJ=hvb&H9MUtGsK(|{eiQ>aI=`!aAOOb%L5jJ~ZU=cX%kd(3+5L(5o;jA<;Uc;Sn~ zFUc{%F^aSOI<&0k5%C(oHQ9+2=&?r-@MR9P%z2SLgi*BCl4f_>4q@Q4YfzdR?C%vz zg7&t^!bRSCt_E&R2!z$etJ>)UX&;?jr|A~%P4|s^qG5rznrCgEO^r7=J}$u*iuPkP zbXC{Qnq8C~hmg3!k&Ubt0xNPYyDVqTQG2bwd57cWS`)N#FXIij-~uM&Czfxcf7vVr zfots~iPC=%dQDVxerfDY(u?AFyz})n0=$-IKWu*-POKX_C#<-6834#^wzo`#&amc_ zd`htS*KqG+cOtP%3*^?{V%q)Y3!aV;dHkp9@u9Rbcn#>*^y5{XX(YwZ(u-beOxq7C zLCG=43n`lfutR?@sQIR^!uEx1WTmHp^1f{5Cv3vCNQ|a59EgRA?e}M)jXBbl?;b&&pZ|a7#gVgn}guu9=s@x zPj|6?Bzj$F>?w1hnWWnvrRQbzfM^FOY7|f~ZqGmk$iZ~8Dr`}pgSd$_hShn6~pXysvndycwE6YGmQtQimUKg^M-=8&%4PblB z_9TL4F;x+S*H5fdl}MUbp!v|2KNh4<3!xPat7yy`Hm%68wc2z;I}{cCEEY2CHmeLJ zIuO=i6@UT6*iI|csXw4`x%4zs1D*`KMZ+iGGhUiPJ%lpv(2ztBki^#g-9T_HGJxz* z3*<{~H|IGvO(>31&f$y@Tk{X7IKXm)qQ3`HG_;~U(2QQn9(N|Uy4uvo!O3H2o}x4gOleLtWt|oPO+OHhi>^U#m6NS90k>uOleZ z&T^ZkuK8kwZkd$>O0}K_eL=qWw_Y3``;$>>Tn0HBW-s@ldvuS+G4mKw6PJq7n;WMX z6G{VL#z`UI5SC484X@=?G<_@7+WrgZfw>w>!D>Wd6c2Et@Y)g}(ljy@GGq`=*?MCj z9`CLc_5!NGrDqc=Sle7wN@vh_hylYS5a%B_#0s(x@V*0Hyn8+S+hWmSZgL}6Hs1K2 z8>mG`#n9}&)EYWBgOSnzXujB!!!;2!+kP`zX@x}x2f(@C`yY@>OM;L$Q`fiN`BE~Kw z%|iOBpj~1)oBttw*DlSxqpHqV`Ryt044`pY)D{h}22{iZX3j$wFYc{2W$ax(z5kqR zx8@QLo}zio)X(B($$FYP6qKxXQ=46F;P$CaL{Pn}ZXuIWUupE;QYUm@C&apvu_eJ# zW;;T*xRX+eMy}SZR=*JsF`k7l=5*nuhk;H}5~yG$qHJPDFhNjygvH!UxBwP$7caCA zrmvh{S%ClLo39`%kluojRO`tlmvQd-PVwfyYtjkwU@^9JRPY!Q;_WgZRVR2cS%`P%!`CJ6GRjY+9I=pUwzaN*BC;ta zog}#3A znoQxB2C!^q#d2K)#e26EVi^#`~Qcv?#6fq*8mYmbX`QibQd z=hXkbU+$@db~2AN1hNam`7;yQ8~il3Ea6QWaM& zu)<;@V#LrpWpy_$jpxnnq25<Y^&;CJSKdb#u}bVO3u86AX5;~ z7-+m>3olJI@{9$MOAa}sju8Lh3vC-~n?Sy+Dw3u90#vwnHDP?fUC%6fL10iDU297<)+7UE6OHJXK_=1+i$yhl>}#F;A1cW&g=A{*k}mG(JoC(rL8~<%d7rXeA9j!--E?DjASY3 zC|`ClTLUqQg0?2z7_8Ah@E_X0PAt@mW;TrXwcm^J{-btt$E*qdJ?3mbf2n53v9Nb< zUIzCnD#Kh&HhfZYfZ6yBEQWk}i34a4OhXweZ6v>4^gy?^E7HL0z+UzLBY|XwD2(7j z61GunGT5*pHK%Ih2%r9JqN7rJJ0x~5f}9BJ^I^KX@gdKn!4>SOTy$LimRKW=5GZjleJACqT&ab8*y-B&p6TKWr_eXj<`pBp3a`b97d%g4RVsG!m zo=+mnN~anJBC9om^*p?=5=PelB495%@R{|;4X!8J&5+}f)5&_~eD%RtZYA8SEI4Yu zCoONym2g8%aTdU*;xhh2U4q{&7|jpj1H$&tZY=~|t)0xQVPh}-OgLvLL-5!r*-*~3 z^+aQNY+03`jSRk>X(#mlmn$|st#56(A%Ek_B(}b2eZrTL@}tiQuz|g@z-)>UFaf49;T}7*lUSDy5YX32iSvsOd7OdL zyzU!pugQjo%IqUU1Spe+(vzya{Mk8(73Vnluzpgxo-_P0O1t0*n}*+Fb}t!r((mFEAmXYzqAqyrHrxeTh;@`;o;On_2KFdm z7BDdDZd&W>K?%l~&zv4?v_}nJKFB_OP(ns&_Fx=Y<}|zRdRvoarT!tU23r;2vM#0g zPMW%a{+UL;gZvGG+K`TAg^eeV&G7*dha6}QhL@(1O`!?#F4eV;k^{=tHx(TbLubVO z?XrONp8eHx`%6sPcCO~dvE)EYz2{Zf2*vyjkE^kxR4Gh*&EkfRyMDlXA1srlH_yNq;#l74`is+|l;> z63;;)n=Q<#+4k=@8OhCo1qVXKUB9fgqM5#r&(={?dc@}#d_Tf)o0!{uq=APER#hlw z4k>{M8j&FMRgMM?P;Hi_X9+_F=jVYWWzZg3@oObY>+Td>vo$$00taCmR9E7T0aE+M zC3r#gU}GdxX5eqB>LEyIYGKusQIOz!Gb>|TY#LTsJzWjypxNLO_5N2m&Pt&j_3u?R zuc&yjHFEWgAb3zqmAmDNDkJv9KMnGMjTPekAyP}N0pt<|qED|ZsYa!+f3?@bN5!%v zDj2Ju32wfemb8B>^h<(B(f#Tc1mR<0_CU6lUr0a4%g;`~l5I0R^JdB>+MxmOcZ1`T zPjGM5)A<^y3g(ppl@?TyE>WuZz|&nb<`Tw*z=bshA};tC_3_7Zi-K)0B@f8#2x}m2 zgGk^#VD~Ev)IoTUSyz6u!v$>ZN?jf$SI`93T6v_`slNGv;_`A#)#{_)V+WmF@=22!YqoOD62ATrz z6VkqO9jjTPn`>L;KB4O2_oA!$cVBu6!yZ8fEHM$9;om17aPv|uu)5Mhr&&nX8!H<9 zlmoC?H6}w11z=mj=xEEty~!Xuo{wQe>_4CT4l3AxiG$)wYf`rOKHFuj{^}o~dn79V zII%*e0Y9`NQk}VYK<3x|#R{D1*lc^hPlu79gmt5gk_*3Tp&|F2bd@zz-*-u} zCx;adQ)k@spgYR`aaE`UhBW|dz{)7&$#7}>|G?m3tp;wGu4ztij=%+lC;KUd!c^98 z9&D+B@AXw6vpqV;>L-_!w+1q>euL{a){O>U-rE>en`6bloIgDsklq<`?W%VEcX>7* zlKj}lC!jN}{!v1n0xWFSOc5?GPBePRf$xe~i(6I?jc&tt0|;hZytZD3`;tWud7=+4 zvKrQRZrc<8;sfxotnit<(0;9~iWA};Bs8CH^cyHVdiU*VXN->9Ge0g-9sxmy{S8Zj zVbIhrsZQ(@87OZkA85o<$^uIKzOl7}$MA7pg9rN=TBe;6lNq-k(m%Id+Ew~AvyeCD zzleO8=futl@Z$dr4YwaUboHH6;S0(dhxjOMgvs7S|397ScYwC z%7M^99IKGRllZpMCml{<`9SkgO4u+nc(rf-urQIY*|6)L!gMlX?RUv`ilF($=kN7{ zN?j&kpGu(B^s;Z(`xuJlCq9?cN`(tG|m)F_zhG6@j=)6S?*r+ zl@5O=Zj-Y3!xbaKa4zm>a3y{6a`A6{sQaqidA#yAxL12h9;B7tHhr3fMaK;AyFb!T z@PM;to_RO9g}g?1(i`qYX183;vBUn(YOtMTPJ8ZLgJ1&harzO(gZ}38A*EY;rndQJ z#D;j1tSibO;W|uVfH$7EJuEU!I&B>yqcVSQ`WhVACiiJB@HfNy@wM>e_PjFpv`w}6(w2ExRLhEl zVi48V6BsxIAXSD~2C~Wt@y7AsuHyp5ayPOc(BKIQ!!eynkfWU_{nmJU%mFZuSvOP? zJh+e|+~MUrn{~6mQt}!p`MSQc2;v?2FpaYRJEM$Z_Uf)%<+cffobAO;ZN(2(a5wZ@e@N?fGg_QYXq#;m`g@ zz>yupqYZ(RkMOcRr3w-E09=M}<>Heg=!RC5Iz?7xbDdlKD{__UtnuLSAAOH^^yLI{ zS4P?hu~y^?Ndn+zoe7QODMPUrBq#Wz&H2kqiYEADG!V(>c@`|TOJg&iYeU70ELomz z9Kh$!*j)MuOH&qCjH{I-xMFBXD`+{nKh8)YJ%e$~Q#kZkX6_9D1~Gj;BHw27Dud|P z(dPI|@v43Ad7{a733s;3KW4u8dA!!EZQ#uRWi;LWbFu3H`t6_j9fo0`QZpeW*DXgm8b`Td%+i67jOyl%#BFCiP>ec3A`Sn#5bmdeL z3#uV-r@=9F?bC1W|5bHO<4II{<(q6DqsiDm0v4aM^iL{o)gTyu+{YXx!Jn&W+-JgB zlqj7aNys#W*}Q;H<6}gZ-Xt@xOYV^Ev=Dg8j0$2``+)z2tfH-y`8+O4FlmHUnNa;+ z2zXxf$*?=7+|$>&5ywi}C-?7D)$of;?|3oe*5QJ6)XCB#D>#Kj>;;b0thMW_MCfn}?LK~dtZw2FXkyc+ z02JD;VWUswD*IntzmopXY=6mAYKYPQn5v1(y4I zzegNfsi?C0=Pmpk*@(zyE&oC!r>_?PFvn;C(&7#z9)Yke2bU(Zt3bQMT7#8sydN@&$ zn{~o_R!kjL>&{Jx2_Tf=`E%>uGjDs(&inR_g$bxa5SZD;tHzmkd z-cmTnRYGpyozUdEE+qlEkr z7{vyYR+Of2=|6$C8{z&(xjDBP# z83}tqg|~ZWnymIyu9*P~0X(RKqz0@kwQC~Zx%1+#mezn2LCnMxAD)ivuJGy?ZOc*x z{xC9y5&p2iysqN=0!hZ^!N)=X~;@HUBh>lPz5jPE@K?*W;LvEb% z)(OZRos}?9j8c&CJ|lRMd`)r{;jo z;#>H??;@DURrhIaWRK@s?{*T7XS9Z4wygc1_A#eH+sp`;8!_9>DkeKe9-D<40C#L= zzQ>C`WibkE;DhyP|5~Osrh)fVJN+}dhm{x$NN+gX^s}*E$nJs@Xsc~Fp#fZ}3_xt& zvxsJxr2W>WsXdQ`eVql<=3za659YrTyXk%U!)^cSBStd=T8+y9Qc`5O&a@(|IYbHk z&+ZC)61^NwVb@MpcpcFjyvX-!++VH?uCTL0wBjbH-}wV{8l&>(nGcJ5ZY(C5Y*9M> zWQ^q-&%dpQ6uWk@D!1DmCI;Q%-`#ECOJn8BnSmD)32A3f?<^O4N&N!uk_{nRfhk1V zpiA-e6JR6sb(4#`0u9t*>DR|XrW`zg*o^Q#-c0;S`r#u~&rM!f>k^tnM|)lxUH4IZ z)&w1`VCd&$X$5~FXSkWH@GDfAsG(`AI~qs{fRpM}c(NVNj{Be8ABU{ShvyWKbQUQL zethzsT6isTynRLus4uVG#EMRg*D4OJ1X8tMp4AW6?;XV(lM~g*;dB?t7~pT?Nir|` zJ!qHGSJGr&5v6;TSH&?^24YOe0E5!x9fv}0(M|%J;0-Ku!DR!U;!gkU+{G})^k4h; zPGan7I;a{OK|l*D6@9WE1>RbZwx}2wue8v|O`$^P;Kl$tmo2H@9f>-gB>n<^Yz1x# zkyh)29WxsQ{6DfPB&ijLMbhRSm59m-ui|u^xMv zn2-VZcK`=ho14Qm%tvRazu;zL3*WHu34M}&@@m74lCX~dE9Vq08n-W2H>6Zx#(^0@ zwtm!AE=#ftVEhpeunjFnS1Zzm0n!4*L%5~U$VdQ(;-y@#^A|eWwwd0W#gX^t2=Yoo zi81xa$xPn;CW%#*!EEBa_k~)c8bAJ1B!S0`yjl~vtN89{DWs;`eF+lw+F4IoSTWD1 zGkWkPrInc@s;aPmCfH(xVwgqmqNtW(5_OU&nu3D+~K_>HmN4!dFWp&T+y zqg{~}jEA1}nb@A;!jxhfUIfrsMab~OMs$NCui!CQn>(oa<}or+JKh_LUK;ZDi}u$3 zedmz2mr8^QQY}(R?J2Nrx?n$d--aDNeT0FI%xW)-`do9;IvLncSK~+!liM-}m}ePb zTrg+^l+}0;?L+3KE`>U#Enmzbx4!dSk2?7#lZ+tJlw^K-FDr)wwMn=|$*6S%FWySC z#vzpmH0oB>;gbq6&r6AP#zTRq28oW46yZUegOELqC|UM`N7D&wj~&U8lY}xH0RkE- z`kxySmCyQEw5clkj(f8~xT!FzN^B@5Xe@yWX-RE;?Ju>w*AmuU#C1a}*a~o~#V8^@ zfa3fQry;sL69A*hMCnat01f?v}Q zSv-9AK~iI}2I=W!?10NBzyf<{MV7%^p2%71%?N8MM0tdDsP6yeNstj6%1*J_MEW=_TErR*w4IBKf~K zH=nc7LnP(vdfxu8_PFQ-949IDH`1#fXE+zO|Gr0HoZV7B~krN%h9EBchH^+1cSNB0J>tD6@r+-GQ zNBTkaA79#uF??FLkYm(2`z5~`Vo(wZu#>V4fD&8bGm3(lB$&ek~Ot~ z54~U(tRLbRJ7ZqzYt?3Yti7^-3yr80Rvof{m4Zr7Y|u+Fn)=ges%E3VG$sLy2i5~> z<*mv4w>@kZEPyZ6`^@!9I{LHUg4#rqK%%d9E#y|*lFo+RW$(e2EG2)T3T1Z@k^s!S zvCBGh(lm=_@XYfAeg_TJ7{*1!^W62_S7{(1w+MJFds(llE|jT8~4Y1f7J8EC;jf zG?4<%1-aCxYut&qN79@BzL%~Z?+Wzpiy2HfjpyM@%_$*gO1R^58x@8+Fs#_LZ-rIB zBE>!sk5fmi-?t*{!hD9S?i#34* zHm2d@d$RtJl{egop^*)G=<)z~d_{o{L}oYxFO~yeA=$fy9kovdN@*o^)lWC5@e$jC z(Vr>eQEbP-Hc1lrFW%a&Hm@Ahz&Qz&#IdfK@;t0ADE)T{d9xfW+XCzo-Sj(1Z!>fZ_AIC1Q3ND=~!OzU{>5)UV*O^+OBMDvRQ_?3k8$^;xb z)H-*VBO>C>V<5p030e0PM4_2rCqw7x55|7}iM(=oLxau6b;Fm*U2B*b{_bBQmxftY zhaq@=twrgx7enxzXWc$?=sOswzs2@d?HH~ab07tlMTMz^gNM!|9-A#Ydj&}N7TOCv@z9P zK@q@dNAAesNr_YU;Co}rWKbxl5j)sz!%TP>NpxzA+?T|8DkURL5|jBxM%Y{Z=BOp- zTz_!fgqUeW?poe53sboOr~izopHlCL+@_sowt%E_>$V^MeT!+r_*!`6d zhnGT;YU3C6I%)Eq)h5Bp70mf~M+94hz%w-grQh61Bym@mUzi6@6-%6$KJ1{vs|=?M zTUw82oej(g7o*olS3+8~csrst9Q6AR+D>x7ttbWPt?dtI>6br)SD^>; z6GRp`%*1*F^q2)}7q&>a#PdHzW%U5LVbTeBc(U_bqq z1^x+?RwR%-RC;UnQ1a$)1V3Yfj-QgN%&U_XkNToSVJBpMp@qio&(v%48QE7`toUCK z)2yCWOcI^0^I$JhHn3}f6ZFkW*QRTKe!!tj5;o6;VHG=_9bQYkv+-N{*kGi_f zUctoFjleAqA`PP5W<}ZFfZLo~jR9>3-y93PFGvWmVpJQNF^axU3ggTd5%KIR3ZDfD zVXTGk*k&EDETd7S(6UWv7FkqPQIxQF`SWI_bU(N|t)>07+7YIC$`Ye3IO)&q=Ji~Y z7wrvinLi4P2(9oY4$^lwkK+2s_QQtY9bAEXC5YALdf@5%r@;XE@2`pi(iR#5cyBc0 z6{9wmt?VT!S_~5S2fp%Q`CcAd(Mv68><8an+5PJztjHIDcKd$$Ik= zR=OuD9py&r7yY@OSL1r$NaiGhw$1;2=ad*= zh3{40>2i^yQ~@%69rLL1bPFff8(EiiJ?OcRX5cj%+;|@Tg06f$qN<$c(vZ_@C`d*5 z+xm{%i>@^f37AKHes-ldN_}AuMBAWx%+7w{mX&TZ)*A<=w z(J%evB%O(f_MLeq1nb+p!KwR`dv=>vc71eszx*TM0PbNeZZ(6qk24N?Y~R3R8|#i| zI7&J++Jpgw4BZhEl#rRlHoz6xMnWnMrm~TA{gHbw=Q9PLS7mvI(N>Dm>Ur$#HhG4X z(?6i^YsLmj>lG2x8C8YxUPVOIltdpkvS?c6whc=M{E*3c+~7~($KA@{F7H8lfXY8v5yPaCd-kIJh-?z0i^?Z8r2{3W66JJw>yn|-7}bb>{H=GIQ2V( zh(T2;nuQ2<)T@l!YM``RNB8r5AP=A6+Z8D(pmasmb-@#R%2{X`s%WP)O5yH%8NxW< zPH6+Jx%Q$dCO;*Sy5ANzGYJ2=ePqNZaH3}%6Uv$R{MJ;Ncv;;1LDh! z1Hxj~3;yD}((O$mI(n%-*Xec+1Opj7|4F8cn8mnx-%bQYTPC*lKSwbCAS=f8-8^JQ z1(mD%d~jUJJ8&z|2)-P9c3A7g*{ZJ_ceY)6)%5%OmForLuZI*>5Y!<K4e%Y&wuCh~;Q)4{n4)Dtm5w>+lmvn8Xn zjIJFdt)kBq*#EDQ@!k(v!EZ7dk$j_GI?)`5Jo|7fIS`< zlAm&2#uQ6i^PD41S%XV|DF&*n?RjgRs`LezQ)+>%N_6xaJbC}ibCJS-Qk@SQc|ix) zA|f#@P-!~5P2Lyx65}shXNjikcEO(Vzg0I9`H0*n7l>&We&zko8hxTTqHbbZI>;~~ zFbGC*@6zXxiq4RgFB9yWZ((TQm?<@fOvkfKz_eC#UJys1a-0c2W@e+M)S^$ z9hrkkJISWhE>N-)y#d+Szl1g#b1~@s1Ff8*e-GgyOW*~}Wuxqxs~4B+r0kP1P4nMl z3NTfA_r>)&h;@~B2xW5eREus65RJfZI4r0f~Rb z0o|hhHL}h|DFQ?O`xM%F9V9_jtla$SB%|d|G+lm8WrXZCstd-_&c!j$?)Q?6KJf+J3pmLT8zh}oQwr;vtE5w%ad?qEi-K-$&n{`<35 zS1yVEY68RNbkal2MUw0Jy}+uVuF~V`#RL0g+$~k2MAKhYt8b9gBBk2~|FvqZ1K%)? zG>cImE`hBbme|Z{(5aE-AVMDKKOR8 zWY;r-rH^U~k;v+)$Tak3SfOSEgDX)flPvgH`ZfBo6^J4&HbGGMH!J%WXBEH23jg;; z_hAlkeBN}+-&~BJQ$odnFF08H_^%%7XnDN1acmj`glnKY0eJN9HqcssB=f}*CCrjY z5x>+1$!bt{>|<~@0phJt8JfKBUri#R-$XZRFQflRiR()8i`dyTLz#aoMX#;mn>FY4 zG{H4;U4o++NWGG0lqX~Jhv~fc1`SCkXE}b$?*xFYzp5WS*a{4bO?Ljum@Zr2fQR^o z-}ZcNz(e2$J)~uDOL<6K=X7n2VdC#LVw{y6XroYImK_Iq9a16gzcRi{PZmRnclK{| z#w&pv3*n-4omDL2uc6D!1fC`_nN`v{N2)n1e@NtGuTxv;#ws4#ihwNV(|!aFzb$2{Y7Py%{O|&t&$nK`bL-SHj z?^|`{tefrsCh+=$^^RtBa7qZU1~~dZ=s+n3NGo9Y`0|-D{T114f)SV;_8WIuS;Qu5(c|HBhJql(<%eu%9(*w4)m76L=p zWoE<+|CL;eGK7^XsEX`}&ywN;L$3w@+xhIyi7Xwk z^D`6WIq1Y`i!h=jypillwX`2m7&~<9(bNx609nIyK|i_HcTHerh&lTKi!*&@f{_c1 ziLnh{GdCXVuZZ5Be?pOd`IiA;8fd4s6S-t-ijvj=-(A3UiPvV)Qr&XNlKg9jY|cn| zTvA9q9r4GO9BN4($p-iLoRlA0lHVi>nWPN50tc3X5%+jz<6z`Yf6}?YMpg3l&~b8umOrFyr~|vy)Hp$A@njxe|nNF zC@idFQdE}}&2iU%h1{CgHhCSuIW>5xREJwK9ONc2px`cQ_pFU!a{Zr!mTZ4D>3S6& zaygSlI`sV=BZEN3D^&RdkRGGI(MkqT5-P<&q&dBaO1&TFzPHG1WqxZg+&l8%uFu+?p>ETZteq*kQC=Z%oCZ z63MEV5d4%SNl$R+1Jq#{4C7kN)C9X~lLF8~i#?V8pQ64xE{bUVn-EYy>5z_a8O9C`2pP@%$7yp^B*v5FB#dJY?XS*~ zI}oK_O}+KqO(mH9#Jp!uddx;@@#shOqqoB+LLZ6i1c^Dn<*Mq4y-0XYkLOOER+CL8 zL2CLCZTtr>1V20cqAB|g47M>4v)<;*e0FP=@WxEH7njEaC7{}W-SC_S~0Rcg1sS1d5b zGjaI=>ls@K8N7GOAk0E&&$(ftbCp1t>xVlr$(ig49*D^QB-Xa;uxU1d! zv(!h7<;I@k>ua7_&}vJn+c`PcH7v~>7TzgV#Y5K&T-v~%EYQ?#p0gbxw11p<6Y40O zyQ|4v5{-)Z@@?Q7F&yO!A#;li9$T4Q2v2gNic1a`MtA<2k`-2iworaqBpBBN71$%XMvWlk9#T~$on(Dw@?3b@+rdZ zeyw5)Pc!=k!o>5E$@z}@YOx1fw!#SR6Y;uQj(&G&jEeJ7UU@YY}|!hu|m%Fu6pd#Wdo z#I=r2puIk(eG!LMY)T7A&YC*@gsG#Zk6JD>h*Q}FV(-$ksM0R!7Z*KW;da;lnUL}& z^g1!;T4U~7wr%uE%cQ69+{*0C?E{5m?j7EkCdG#o2X>w<)tCO6?rxK_>9T7)fHXzq z?M-d2tf7>nm-oYzqi(cuDvFPHG_9Os24sNJsrcQ^nm*xAombB~DC`HJSMJF(yB5tUeyUQxRNPeQd1RCle2~=k>)8rh0)V!t4o{O|Q(RF(0sH z^X^g`=bdzD_Id9z{dxFT_Xu=?Q_d0!-dtwd8IKw{eV;-`wPw+|Y{tW$fRg`B{l z`v*?RRQz=;)qzx;%sjx)SMjBD@NJdrR0n$aqo;%X{`O#nTUpXr=O=S;6q=|dk?r*^EoAw2CFjo$c4d#qv#`j|C-ot&*{99AHJAVNe|L#njUNGXHXJq z=@ZdMf~K@pI|^M)_Ow+hQ!H;TT9~FjulGKqf(*P}1=?h9Ax9+ioW~GFPjb3V6QHxn zHsvK%*yn-mg&}`GY?f0BCH8@ zZVA@Tj}cX*rKN|om-Sflj{{&e4oUQ7JZ!fcpXe-h{k%=RbTioIPD%Jy?tU}a5kCFh$;GOsS4^e;~$>H=kjIkb$VBburX zf+Fc_4J)I}!glr2ed082YcSKk@*S;vYsDECE z0-A5fD2PEjezyAfmxAp;Z@n+&vY8O2XmA+R$W0Odi>bF&{_c6d+qluv1!;M#H(8Wr zx^`8MK3fd`o$d6uO0i7+m?`xWi@M}#rl0s)TSrd-Z!!Bu_m8vYc32&I1D+vQ>@S>7 z9QUS~_Pf>~D%2rCk(|YcbX@;!9SU3EuEp!RhQq<^N>?eH%d3^_jxi%21)qCm(zLi> zQT4FRSUfg_k=W|6M)~wZs)N*?=&otIaZI*DS+JMVW)NA1q}@eed!$pzCDO~VA>T|- ztgSa7W{L+*Y*StkGx_tmVZ-gePHu`7<&@iV4BiKK<#*x^M;{!G?hrrh5(=d^kBg_x!;yV0kB#Y!vYtBU_8!$9^W4TLSvW6P_!1%=%UBMYn z>`Ymsr}pxD<7Kudy7J=KYx@#!!>eL^_Af0q!HES<{&gXW`O8-$kz3MQvBpGqf_~Ee z>5KO1KO>;?dTwWLGk>26YeWPbD!Xn)uB&&+TSbkrL3PmC62_jYzA{2zf!2GUBXX-k(Swi%_ zF>%IPm-UUUcFPB?{QK08o?M_##V5#z*BK*8A4&0+{}?%6mkH5`n^feq+VycF6=Fa?SeB8CsmFYJ7>ja#$GYn2Fa(k}Y&ynh3kZMEV!TV_;Q)GDazjf`? zNo8H~^6}a_{lL-}wMD^bwJ-Xeq#BYYb8{rlzmB7h6Fi}a=a$Z~7m&%yL9@$mIb z>|HCnrJK9NT;D#+4~6v1XqL^zl%$Rg4hP2&#*sYly@%CbKcjP&d!b3QhgEqR8~A|7 z`aLshYzE4<&F%s}Zl9bAWqtc2r7kjIqOlmgC;I8VN!XfG1Ihc#FN}LLhI|e5{hk|I zKfu5FYyy&2rcn~EIoa-1BF$1%+w5y#r}vWQGTYy*VsUT=>#E*o1=En2T)Ga;gNdK` ze1<-ZQ{)=5J@&I=EdCPTOycv=rW2#!>fAJ59-&i`-)MYk^0HjG|=kw|W8ebX3t?i$=bL9^|SN^Wl zv^l23ws{n@X+L&ulGQkfVl2EE?z0CR7ZZM*h&Wqc_LO|~9Hdp#kUz$pD(pJ%-+rbU zRy!!n>7t+7-B^rcoT(eQzXDj~+-_qpTNt(>D*Fcm792Yjy4iDOmb~H5d|Mth-lqX* z2`sY>UF_F&y|iQ1PIP6TEqL4|U&-}treHF3ZG&&0#h!bvY6$GS`YV6^(3tUHkt1JF z@FaC_V?&+7Ozd2I^}{L&r@I?Tmb|!gKGyepW=AdWn1C=!o*(xXm8>=pd!^ijS+08o zg@V~g$%2Pmt%2{CPE1M7{GKme10F}W>u7wrH%e;H(;2>^a6}-cF(Nb1;bn|4n#(GF%E=M0BRSab0}0n zY(SdL9swf`$>7PJ4MJop(BFtLW{~g(5+Gc~BKsV>#LsMI*xzkVkiogUKJ~?_sg+Tc zzW;D+uO9P^VmXNS^n3A!3U9AvB+Q-uA@*J1ZSS%C^d4|&{?v&oO%6p3D#RB%H6!7A ze))JuFJ4hdE9>e`nyNM6tBxQ2{ygrePmyU(Q4wq$z!0#hyhdZ=! z8ZFzu>8DW?u!Y?DMM{8c6s z)zmN7vf?VG8iNqWvX;u#iijl)eW(yP`bhfRWVG`$X%*L*nNSq)CP)a{LZQR@mF8pq zTE|Duq8~2_Lj!LBA|^6ld{z3N{Ju+7m*C$E~ry2YSk*X}W`oeN= zq^ab!1n3CqtNGi`UO8jhm$&$S62)DOsBsqJjU-Uh7BMhKS&&)K>FDrVb8=lb5Hu`f z_~KQ)IHq1nN)LMrSiBD9dYAm9=*AnZ>xBtyx{`rc{sZ%6O)X;H1GV0g9T@aMi1^l2 zK%R`O{Xm~A1}6t2h>AqOgaF?5!6<3$<3tENpiH?Bxg{^+#RH{35QJWNaeB73@Id3O zSgMFl=`Zsh1=2Pew7Kyl@>wb!^o-Rz*1FRylW9|Zzh(h&BoYhX>C)_?>E5}vlrALp zEPx*1q~!d5O5~W234Z z-;e#_v~KH^Cvuk(;@l>Py6d3(rUaJ%yYL#?25H@oHl)^(33=3tMGu)#6)3^JBu+?O zw$b`$pJ)IB5CuhbIbGqS!lm4?_1GetO`@uOzD84!UpN~pGN>4Ks4YfIB1yi2*#n-^9IYt%#dWjDtWaAl=dq`ub=)KlfjcW(lUg#JMqz00Y#P~Q&{KME4G8$R zuxp>5+D2t`7$ zL>n?wq4RE_(y`8b?a~H71@C@Fkl3mZz#VFXtW)Nt`fmJzr!_>{Gv4?5uF~iJ!H`#n zb(T49(7ljZSbgEfCS^cX99LJd8zfg|PBX^%$WG7NW%~#f5aRafhc;J=IjX2Q@V*0g8q>GF91Vlzeu>?9W{$DXc9qOIl_~1gG*D;- zcz^*8rcM2CSAV?s6N;tb`eJG<$-@Kh+Z;nfgwzW zWY2e(Cg)DwPsfnoj=(%zFzE57cSrVxS;FUw%GKErIyW91Ruyxcb^le$=&wm`Y`OsV&HO>f7C233eIWQ^-?y71Pk2T|X) zrJT8~6CVM(8tTaK`JAQVLJ*2s>(X1}@s)krEHI^@x_^1`xqV1f_SNAsL@2;55o14j zDUq!BtjHlNq21CEnSanIQiKUs-@}KHb zX z>!bN6pfa>GjXt#3+8)&^m<;?S^5M|WBrG^()wHm~J!_2K{FH}sQ+7i^vZl6m)9mHB zAHck&j*-mcYqUt6a;=KLsI%+bOZL7}NIkUQ?-0sgXjRZW!;NcwkJ$I8Nl0$oS-z)^ z3y8)wD(FytsA$aN&AmZu_xLe@*}gKodw@TJ&eos$T+uz#eA9Z{k={RfBVRvm|- z$L?YHXEX)Wri6Fy1h8KkSUM!$?E_fjwO74t0d-ypx+T zB|Q(s$Bn18d36~mXKB!W<1eF_cGncG=!^~ZQCC8awN+!q|bp3N>Y2>{qdi>?qL<N<>Uy$1~zw={UWVC*~t zSjXi3``Hv7N>*H1Rm&z%New4-Q3rW|rmBbnT!X9qN}xsfao_D{SvD(FHo}7_Z4G?_Xv= zHGPo?dAd?64pc*FPS~m}ke)Yrd33MBTqwGx>pTY4MtIAyt;OyGQxjdCbGOf4A{(iG zaD$jRK?S>C?t{9cTF7%bsowo3a)qV*L?>cfgd0d4DGxnx2>BWX;=dR-^0vGdJ^b~{ zt>^@@G(#387jg(Ek&3}A~F{x|=G1vNb z(naeiO&6f%(`zwQwLS8p!AUN-vS@uSb;M)@;akh*A#=o@1kW3Gr@6slQpb5ji^)*g zLpsRjdr-iF>=ax~TJRao zhuTdWc|m9vqej({jc4N3am72{0VY`w%jWKWeFlhIqTYcx&L>ZlaARq{!n0yIMm7d8 z&#w0Dw0uREZhmw8_gcw}@puqEInOyNP@yU?+7nwuLyCQ{ zf(v+c7N3=C0KxKru!PN?`KzOM4B4pSVo8P%hTgq}t{J3+4 z*dMaz&Dnb5dx!*(Yh~9N|Q)9NjGeB9_-6zn$F%!_NN#qIzrZhO!)kF?%Fx$ zBTM=6XgyX@+b*s`Nh7NU_k)lwsOF9 z9Yo-McN+e@VU)V0P;0_6ze>4N6ZpMtOAW*{Twnp@UsR)u?^8_2u=1! zq^(*waM15e<+@-qrSxWB^#~qwvFQsr?_-G*TnRcwNTbEcITHmtlJhY&WIHWR^gBIk z$>hOhc(;0{w8gq8yF(btV*wpkl`{q7#^M0Sg+Fts0x(s7w*GVe_F3?&@h*+P7cYP3 z=BNSjXU;#vgg#LigZo)kYfqT-Rzn5kdeFPD=i9}%&Uw@V5!{-)gh3DP;;et?hOL4c ze-!9Dt}RB&5ZkCvFA#<@IL~%ueO#M#D*h@TjcocoliJ+1_^S=bpW3BEY zl;QV!N8`eQz?V_v@%)}0k@#Y|ON|w%%Y(_-L+mbp)Ro4l9c6-g@%o$508<8!+q>>W zTg#cY%pyXc<+WMVl{fwpa|%>^xLp32H6Sp1Ktnvxq?}mjTPw3mWPLF~5OpPN`LORd zs7k$waC-J2pSDmo@zku?KXx*Qed%xG>p(m+0e)wTJdzJ#VflopI}i5|@3y8?E00$ZwaX8R3B(UBVOuI~quC z^}vK5kJkH?s}wzdH{Y$Yd%yiy?!`dK&zRJtHQhCa^ z1}pE8^F;00Hr1!Av~%%+qO$;s&Nxq7;)Q5bj%}4}^#i$0?AQa)?C1cL{68-jlx3p- z1O}2oC5LeC^JO9l?IL#(fv>n)!~Ct1j->CmybIX`+1h2CaP9fJJ&^}wRf~X0Jtp#4 zxWW;nGZ{ocks&8NGI&%JFsrXNx5o?5GA=9wM?>V zMn>bmwiXnhl3##}M&D_0_IR#0?bM|b7riX^kEn>dJLHtYFI zLpA4ru2cV=yV@xMvql$0k@E%s5$U%oIN}V*x#T?^i_MC~ZS)oeN4jA@-3}15@}>-} zz-uG`O|qlg`X)r1)==D_B>;X=eiTbunGD>Xs>w7&6Vd}PudX>b_}L)PY#m(tcb+$W$W)Otz2aImQ)Jkd zfLSbeB$e;}(nHCQE9iT(=h{N{uriT{1;DU6^7NHG)x<{|++^-3yq>G;TZk5I!X@uT ze48-${tHENGx*BpJjLY4{f|$J3~7|7D!LcHK0e*2f81o%#FN{@{FfN3krhfw~ZQ$WuDURgfZ5pr;Hd{Zr`>J8ze&c zP12#D6TLEu?$lTF#E*aHYW`3-bzrJye2|!#V$fLZ*AlL_zWX`RdMfFVNUUv!#QRtZ zYKO})Y9v`aOLp}o983U~wj_bt)LrX$^gG53>6z4~+F6<`fU>}3IA*j z{Z!YTX2gorYtxuORqK4^n(pB#q2mc*kk+|px6l4KPS8+nN7q94?sZh|>4V~348FqK zVCb0g3J|}bLVHhpb5m=dH~+7Z%WRt8nmhi;Z3Isot&Zb7on8WZ&mJcIbD>yWD644p z%+6Il!Fk5PG+jqjdY*~OaM)8&x)zxp(#od8&P*b>rsq1PK4RKJ$P&&)U9}8yCF9c< z%*R=pFZJ7q*KSw`k7AMb4L-CgSH2O_ewCaRWhzwN>eC9XpDOUwC8?#aH^w@(OI?u$ zr7n$+7G*pDAY=SzvoGn&EA#q3(U4&u1QiEgYYf{TI3Po;=;z_7pkw@Qa@`mWuvKRn zb83lIgK?Eeyhb|bEIa|cE}zc$pk8)}8vV!Ms5}h%YjD2tp#0!>Wi0n=0ZN-3=G2bp zEBkxZ1LdE%7AZhY=ODHrIC~(s8^}e`cV-7*)&(xpsb|zt5>H#sIR9YW=Y7<+on)7q z`hq^m8W>UrR4AyQhR^sB^Q6^+XoDg;^A{S$bUS~b^e6aJ<6l=@)x%1WsNQm(fw?IE zTVn5D`KRF*>t4v1Btnkd@9DgI=4@rS#NI9J4^l4} zvJdP||3V-I6ZJnxiBI#xbh3UT3L$>OXEF}zOL7hDyO}&881ls>dK}C800h{{OLS2? zpKDhQitOue-2M{l%KlJp(g8~p@H7)HINqLm7}&oD`<+pVr^p$lCg@nbz>0h&&t5cH z$a{divD01L>UJ-IC4ikvr#l5E`U>JqO>>=xdYSj>d{*f}w#w`vh+%i&YmGlR)rmyC zDl&-SiW|N_!i==s{*W6|k^#{pQ53;%Pj1O2H^w=3Wugez-QQlaHnt9{PRn@ucyLx* zRc9!ZQ&4|z1f}FN7~D1Kbq+CA=J;nn96SFI?>N@7Qw`!4;AP(?Q>+<&if~2UGJ7Ov z8BqUj`}rQma7r|>SfJNPjk?+sUzHHZBfjcG8xHx? z>n(qL&-145ZKxqwV2wRk!7X5cIE zK_;ES2ONvGgl^vy_YUJm+UMYCuRbTG)E&>m3-;hQJFNBEt5CldH2UEiTX)>RNv;fIPLkzWYJtb)#iVQaYnT(eQIZ zG62!j@*WA#cc#YXnG(+6&Co>KXZZ+0^9_P!^BRu>pZ0EoCuy5F9-c>9JUmAOkP6Ng zWXD_=Y5^>kzP3=nz(@`OKxdC7ZSP2yI_U?RKnh<HFkBHQqDED#-*|JKh4k^zbL6G1}BUq`p17)6Za{ zG1@f93^yJHOFzo**@nRmiZP8>0Ysj;IH@JZe!WnWBbgX!EuO+wH)m_l9v|437C3~V zdM3hX%ZqO+sXHiVK+Vb^ya+b@{mNy@PYdNq08dQrCx(3ueRiWVLm5(+41oin9QST2 zTU&IUg0uLQBdFK~oPt_&_K^3CFSF@7&3CA5SUs(0%Xo zS)EDqB1@SzN2Xy9Q-?&CR`P)Hgok~ax3&8ym>>0#ZSwDk=B0Rl`#?GPXvXLF`Fogd zH-^$X&q8%;QxY@=q-A3c@<1~}M(-E0fKHn_ufGMk1oPnq2S)YZVS5eHOjq-7q%G?2kp6Ln1(9Im0(JzTidtqO^C~!c z!^Hb@FBa`%>+y9woV5%5HzyNux!Ps#6_ZY~)e{SktCLe~zCJ?u@XFh0%2Htg=Of(A zmD3GR*DH?dILvVua3#zqxsob`Kj2uai7c4JaEzx&WcgVUd5Qoe)owP_M{_@CVK1Mp zijx@yVCKcf74wdT9*io}uBwUFMLqEHe|wUBQ!BvSoJgwlUVMs;6E}iDs`Ac!8Y|NC zZQ14mJ>{|vwq}3UoA(O1aC+K|EI_4EIkz5@9CIAI!@67nITc!kwsh%keQ3L{NDn&6 zSDyFK2rT*ATt;Bek5JG$)~WqLAAD`f?h#w-?KM}FpW1LUqQHHE(9!|bBwKy#&AfEm zb=;>{51D>7J~>~GIyL)`ZSdKLTF>hOsh_>#@(OP~OAIJ6nnOX_y^)bLszYo&ewKK` z5Nn{NWzERsh}we#id^hY&RVa@QX9RZ`y}FaNDXa*@ZZq2FvMps&L%SFrJ~`|DsJr< z9_zGU?~CeVGD@ZCnGxZQyO(n3Ft zg*T^gd!l|N)wwfgo~S0l@t3GAcryZb?hN!u?)gJ>t{*?LBd%kT$yIal(x>SVOXaI? znh>}O#`jc|%QT9SFGS#FRmJelie+qJ`$Y(kUByfI4iH2`z@0WeWA{q1g$#S=P9(m@n5yn-PIUKCTo zoj%KLW~sMEf(Npi;B5bhV1-@PC9o$@mVg1zw5WB5VKh%)@87201hWMYZR`XqV)WG% zY@}~<<5r=q9ugJIfIT>n_$VTiU)!Ad7{?pwI!;<}y-)t)Jcw^Qudzl?hWVme;6mXr8fP zsQ*4n;^81ul`;x@8I8(3vv<70OtKB)6-zR~sxd+c33_Hq^xPK^|Fn?-d6B7UrmA^o z|8ZZX-(WBzXpk-y>x@qr;3*K`W|=Bg?0zot-BxO0-e!?}lH-aLi*pVimj| z6nW*3S-Zc=>&nnmCw6{34$IpW-zO`aEk`OiP8}I_fK#cqXNbb|@4A{sCVQ5~W8C?UNIgDVQn<UrVSO|6eylyE1)VoJge z`fa$M{GR+}Y!puZb^0Q%Bnsy$rAm_|6VJVnzP7GX`irnr3HBQ;82&EGLl*k`jE3&0 zc;FuEWDdaiC`dhSo1Bx!)_L)a1WGT9+zQW#z?+dWu>eq*FYR(1Llfs=zWJp8v`@WeCgVK(^FDu;N8PsJ*# z`3H00MFS_wq_Q6gnc5Ve{@69xC2rC1ws9UXgZ1SHviT>fiJGJRzL72-GT}tN>VBGv z4BNFRY6A5?sfW^;I;2GZ0C8kUI29f1Yd9CCcE%uf+vxQE%c$&mOFxd%k~G9u=NxO9 zd*#Fb&T@&#E)-V9$rFuimKzV~cM^K*-l`$|=&K}l6&B}UU{H(*J4Q51cplCNxOkC4 z16(;?l26k36YlyfwylVGc;DUiE|}EF!OScm;02NRijXLU;8#IJ9%}^l1l@Qw-d$;9 z@-W?jL27upspfq1E4Aw{MLi@4vFQfrO`ZXE27*e?L=Oy6YwNphMP-fgw&e7;I|T1R zWZMK<9LFMAXT3~A!&^10*`#-X_7scOe%lZ$3be*`PSiRY+g6KDCAvrQ{&d=hMsMCK z{00u>BX0zVg8T6lG|dRv43&VKMcbuJv>;c$2d`|rk^Y%?ugJW&z^>im$3&cteJ0Vl ztf#i8^u5M;kI}A6ExCK`jFTU~hG_GWmdnzQG$g@AW@`wfOI{g4;JUz3y<6=!R< zjjAj>KbKrQ79dD~IC2=#_>=A4S)724WTTN^DU7GNxc&?TbT%NlByH}_z2-|6Q^=1k zxt9vmg*^)O4lV9EW*fk-fRJ8ke7raQ4AS8qOvk|lqYb=I-$-Nh@a$5P^ zt}kj2AoUR2C9jIPR3toLt~?dfx$VQ1AYKlTKT-o0dc`G+FwgEB%L^1?1mbb5$ZH!D zgk(RCS&y&psxRU#Wflm$hlebv=rgLb*&yG%f@Ahj(*;T5lx-P}7*p7BV7usy?xMey z85}1IExIyQ-)Lx{qzoa;odw9xpQ*o}k6Xv$whkdI4w~S_&BwtSdDZ&(`CDaoz0NeJ?Tny$T zndk9i^f`rQ5#y|(wV5W@XD1!nZt_yIS7_3D7$>QC;fgnf*Qz510<#ymnL&nStjBoEf!+##2Tc2Zm?`fV=7OoJ5ZQi0IA@N zyYPf5<$r-et7T9!1p?;cfP%otEECRR)M0yZUS+eIs_IVFt$Uw`=_%1YC1AvADT+T7U;Ud>ov=_>YJi$zvoMdaR8nKC#~elV2x z?oQx4W8$E&kG6ooUa)y`3&I*d2|SBD5GRpN=G)(sw{q`gM+d#(V+@ki5pU$Pr&d-B z(KaKY&o6#3L4nv*<1g(XkfjWN*P$E`ZW}0wU%{BO0nj(-zKbIG#QkpRxHNqn z9FLB5-BI;Y>VNK%n!lh!bTpQR(>c8)j`Ft7^A#sdvN>rE-Wqonjr6cq~Fc6q-KOaOfG#-(`f*?)h8+qKU&Qq?WzqF z%jB${=D%A~E_@3wByaa%6^0*SUwT6|@pABG=Rl`$XKAPqv+JYJ@pCWm2MH+rTt(){O&cbaoYC1^L^**PF56pLn|sC?Dg7Do-meEk{cEL#?#4kLlM9?GSXex z-fvLvA{+e`{NA0S@Sr&VX;Ml98C-f3O)SbXF3 zH30&rlV+5zSyD*DQzf|4oK;j$)2Sz*2Wp#TNTu)pM*l=)*V~~@lJ%*4h>nRD ziy`}TQ#uFZ98uyicN89dLw59Jt7I8kIv^wPP4O!sOYEAoIVKNi8~oq!OJ_72&q`3ywP6wlIeTz!#A7M$~`Jn;cN8N6shda=auNHaaMxtgx!Nv*s~Jf zi(D|Y+h5x3F-X@bg?u>tk5^R7<01&P378?RYi5h@Q($0{7-W{w0x7~depf8WK7Uz< zzeNc~a8VdIyX^c|Q>NINE{ZR5j&!$}x{`%j;#ZbBT9>J5;2~Jc$J@o?!y!H!;iu!z@0@CUR|>&IDIHj7B+(@00Qxy4Vq~F z5ob`>6bQFy01SikL3K3LO!yY0rZCYJzg2Nnf>)pFKNgqh#Hn}i3I@TXLEwZFWSvE^ z<1@saA39ZFR3Pxrbt^~}o)K{$(FcM-d~JO5u{VNgR1F05!n&#fQdN(7o#RSD$MNq= z7s_dIG>PuHSn0bC|GSVp=i5n|i1A)9HGT1y9wyW*}vmyp>;+>$+NegQw;9z!QZb-@= zItj6Q@_%n?4;RsD2?l8nYa!rduHTg$p~ zQWyIZuH|sfRrUP;9)klPg9`&c|G$M;U4N%MVCcCK8(;qSgM-v$9x#9$kasO&lrH-( z%P{kfmi%u==(en^+`t_P)BC?2@x`Cd+|Nux5Y+#7*#-i#TRyD|R74Iq=jc$M|4&Z? z0qM;`0#|ZD&L)q+KmXkfZVJ#7`x2tzHJs~a75SH{ze_USpS>c0kaTfK*nCmI1ApMk LTCj?TRr diff --git a/media/images/gemm-structural-components.png b/media/images/gemm-structural-components.png new file mode 100644 index 0000000000000000000000000000000000000000..3cfb3f1e4e61a1efc57e6282ee2eab5e90f21fa4 GIT binary patch literal 245863 zcmdqJXH-*L`!5O>*r3Q(LDhWi`e;*+&!X#?rSLUgLyw)8~n;o}Ok; zdw%^=-pCnpj$n%Or%s)^`ur0c8(Tf+BiX-CrL)|4i8<=2jzg%Ckv|a{ovfoYitx7J zA6@P4>hAV)Zs8ri2|oP49_n3(YlZ*y3V5QP_m}X=f4#W**Gm}jzh3h=)r9!hOGFYU zX3hWo8vQc|{(rv~dZ&u!U#|toU7tAnuh%{^{tYFZ)&G3Vod19M>oxKYKlP{l@eu*X zA0i?cr~MA{UDn35`jN=Bho39=_V*P&K6r0BRpA2Oe)SP!di!y(&GwpuoLWEkTV!8DKGi&SUerro++gzwLlU9yq`06T`_W%tF zr5hd<#W*|LxbvF-9G&6^$GJ>JxL>7G9ACy)RI{$As2`u}%HKHC{mYp4S;mVckH*P%JSw?MdZ^s~Dm2I88aCT1ZybsPjlEX>R|Bq;Z>9Jd}V5$IjW%w zdGH`)!v9#rb0vHBQRTX*Sh#TG4J#{l?as&&vraw9Ec(mRqj3Ki1NTX8KKm&PaH6zq zxWw(l84vSXnRS9FuCl1WNEBY3i+SAhj6d{k5KO0Ff=MF_8b&F7&E&lZ15{Q1bxcu_ zI@!5%$gn!HEOuexJ41Q8EHOr2FI1|eRhE?G7hY!ZN zfl9Knu_oLS5(dwopJ0+SY_2_hyxvqXG2vp+rJ*>kXt<0^39IRQ3ii!@IT)df@ z+AX;_J3DrdNnAYZ^E65p*(%Gh`#h7n0_7aHEAi(uFG~Ji?euKC68Wi*Ev^o4iWMhZ zTVmL*^ZfH;Gp6VN@2}YZTN}#by1&1qEtO?C5vhKPAOwAe{Nj~mj0wFZLIb|2NAY_J z5wSM^*OrNkV=rvL<#Z+N2j)G#Jx%Ml)2ry_mg2M3&RXIyyX3S{pUA|?Iho{yZ;M5QiJAF+KeCjWgQI^%6kD~|o+NoJ-x?yhFm9iv=;A{CYZWmu zGU_DxY|&&Jh~6!z+q#Dqby#$4Z~q>FMIVs`7Ayw|L2b3oJKo+@<$ky**mx_z_g>(T@#xkCQ%p_`7yV-=#D$ zEbK``llStrD;_)Cd6H7%Ix#WvKPTb8h_#<*`b8UZohGsM2n+PI@fG_wV2PGn6d$QCar+Mn!`@ zPc$yGk&zwDCwT|8$uX#Gj&=+*SdW!iZSBp4!mtHTzXt!0?|!tSL&W?T6m)#zMq5`` zWJ1C=P0i{#cHqUWX^9Ua{Q8?r68uz{o2ZS8`zw6u4yd*8uY zLG#_N1CRBDN~fj67cX8cEW+%gS(APG>Hn+`2k9on9kK^iqvL-=cgzVL`i?@zE4pZ2 z2oLChaW3`MS(@2;DkY`lI=-VlYNp2xl3Un!R(uxS*Q_RM$|b)_^6~Q%mfa(`pD(*k zK>|t|(<`$zq;Ic~p%}%)!O_7Tm5#^b5p#=}8&@eBIT8zngFLatL#-`V>wa7z^94(q z)h1K^$N0)f4lP9?yD_FHPMwOZKg*)(@~nBvDaMIayT63J`3b8gCB47-94)k=U=quR z&pq8gpSI1!k4W+eK5@d?43XpNWV6I+23mN?D;Pf?-+Z$FQP^0y4eW2bM~}iWC}gW< zBD60p(A3PV9cJIaF7xVK)Eg>Ry$#VC*lkwSoa^4^6SBYl`q0)UFV~C|+ZY(oM5cZG zNJnX&m6g?*D)TDcdN3!mqdejqy#!mR6zMIKT0M334{`o?k*ZlJiAuY1?U^#9 z9j(Nvy z?2X+?_hgh!##m{O)cDJ|a`J+=K@ZN5dx#l68@|8ss&+Ad+f~rrVb~|CTR&H~cFw(u z-Z_g?w|2R15>7%`7j9}M9juxlrM%}hD@O4= zFx*#Eq6CE&ToZA)BZXULR8UhBHUZW7!lg^mkU-r79RtTe1CQCj%%2FI@VTG)_ZL=J z1BevU%{rqM4tCZ+j}GilxqH%kK-jV<)q5Ap=f2KGMowPw;72LFefz+)Hzk$NZS~cK z^XH$0HeXea2eUh$_%V*7_1=}EYLw@Oc0;@SJkh?x2dfxwk;L8E=2y&& zj51(*tUX*>-X1MBk<+d7C*pCEPr0YGKhir~~2NUh0XNnYsRbd%H4==raoGP+CDR>D4yh9Ybs}Q`h)OpcCbv9J3cWII(iiaB}WM7rI01p(OL{^OnO51j(oI7_;s3&X69Dum~3PbTY z+^B@rMBN+HOc*S&Bp|>*x7Mp1Q+h_npEv=oA`F(mRGUw4(bn-gd?D%QZDO zuMs*=2`E0L$pxMH5iV0+(^x5SJiY3YFX(0MZEYWxmo2>qL~%ZDt0NCD@fvaISJ;Me zXyuG({5i)HZwUzTJak70EIpc8^SyY+X2hAx-OYb-bhD&-1Nr zcnOpIplxxq)yZqNvpUM>_)V5Svr&6GMQoXpinD0r`DJpAiS|c8P5jHAL#I!0l^sGKY3_OX>)o~C*DFJqw}5i3dQE@OIJQY9BTyX z>im!WG!k_e=I7f$BQRYZDb6g^iboG*o8Z8f5;WO7OXx&k8|)^0?D3SBf1Ta7##1v< z{bFjD5iX{(gwA=qUN$phTsdj~=hGqcG3WxdL?Vy6|Jj`gO|Hg$gT;BNJ3Qq&h30-! zCXf2lr#tN_cV*00M<|(Z-fW_OB^=9V7JT-qP*%Zi(zA z?s)F^SA~l!>PgAgJ&2^G{ojO2vzfHKi!iL+iS=`o!kmT`=42Vs2 z5?gJI<~2|__Ev!S^HSC{xc$HP)gpDUlo>H`b%`6+8s({>A17#&Sq7aPOL`=L`F>T< zI?_`@dVih@HIxrCH8ma4NK9><1L_(gFUG7R(oupduk$qz75Jw7{-GK!40o33E?;H^ zAlCWN5A9t?Xb{5?f4iwOqLRlHSB2!WA7v=-jh82VVuTME?*|A>Z~hfsM|l7Nl+I3K0wIwG1D`H|mBouW%RmvKpEn?U`h8J>Grk_#6a zeTPj9mA?l?EO(>{K|s!-(npWSD(pD3v$JjE4hX7)6Ps3%`2U@Zab*ckITgz zkB{&R)k{ecppTnL7KnocALOD4lX^tH%R+IDk`94 zij*P}jeq~{okT`5p0`GFXn$vAPzi7WMo_VD{!@j5&;B>5*c5CHf`O`N_WlU|^hU6n zs_KWX*zbW8uEWNF)`x56q>m?jBfGdXf2vJ*nhTs%AH++T&2WLqdodm6i`~5r+kh=c zW+i!PSJ)pb-@E6+@X=!@(Pbb1?d|B0-vs|l*zt>osN;hT_yQ^`Ia+wyMP4*Gp*+2?4;)P zUgfnkSI?xh-g>on%2|e^63LYW4910OEZ?ZUM)KRWe7}iQ8PYgm`y}5BLCzRI8?EIi zo0yI@clYp_560-*2x@6GMBO2U#TTWuJb-P5N?Vn0{+7ZN`k+8v@w?Rle}tV3K) zyw!OawKcl8VcMM_N{}<+u`Wl4`$KNKjN9~gQBDi-1veHVGSJbvPNmv&Zt)~pp!Yv; z?lK0H`L{ewM06*t{oo=&eiyN$%}?`u&>5jKS?V#~r^R%prcj{Aa_HFEnORV)6LksF ziBO5axTr?5y)ICCdzm--JmZ7d25qfD>n^X7r(oZ9#ga6EYrpcaagouA36pqxqg}jo zxMOess@a5E))2#HFrJcW*8t*TDjiymxP z)>M^RfsK2d%X04n>qIR>Kb(1XHG+b!Fmi{k@WVN}@z{;R$_Cy~<<+U>r%s*LgEva< zP9}ybJa`Zk2j?7FV@kA#LPInQwRB0O#xzLIRvWIzFDc%EsGK&|+tnXeh=GkM#?aekIPcnI$Smcin=Iw#FU@g?R<`S!`*H)yy>D2P zWNn7+`zQx%=qE=5!dtyMSv!ou+9!xOrgbGrj$@ABcaOIRBU+u7>i$rF)tA5e9Q}jl zeCfl@7um5ZX{qu$A9SrA130mMftJ=f)I*3OG$=A8oMr7mq4Zq6YQ5_!^JRl^jl&ns zleB5ri#S$ub905vqwUT}e)LfI_Eo_TnhK>yX7Z+{RR%s<{OuLc*e8>V#!sJ1NNk$z z&-LVobBZo534F;r?vlv!*QL>AF%6lsdaV-!WL)KKj~cxi?D4U++|3(?Jj=YiOD3-rQ@^O*zGt9>HG$v{al5d`$YG2R&yRHWC0vt_v za%<~^)7NoDYk9Tix1BM(F|n~%ki+V8fP>BVNFNLM0;p#NAjH{gBE6V;ZD@$8S*DV9 z(8U}x3%z5`)RKIE-682HbPT6fu1@&XS|di{p^{W$bNpD5f-l+Izl3iXN~1Fw8u$$x zj&T-bVaD6een-sCcrhBmc*5ZY3XRV(tgXl%SWg=d z25nM>paL{DdiHDp!N_cPeR9$J_c0I=@IQERZr@)dOnZWx_exw+k`;$oX4Yj$XImTJ z?$_zMZMNlxeu<9@irU&CUs_sVm#8!()@B-CIPE!vyZZY;%*`_o@w?MQb4i!A2VJq) zVMvS$OeoIJ*f`}Z4WBFygEGN=E_TZE*ApCJ=918aq9+wNbn=a#_tN%o)6v|>xT&}h znCN_HinP~f7QG#%2tVD{-Y(c^j$g9E?DspAa<1l;ZEe3k&)8K3Rddnbn)2Rgw@CIi z&qUj7gwTnbw!EX+r%w5^=IR0=Jjo73%tOGuQC?uJA8`HO0r|4vsQ+jB*rpL}O7&I; zr&)VAbBSf2t<})#Jy937#-!kH^Gla)>yHpoF#C1x!JMc{Bhr)URXclv?_BpBN=}gk zwIA-6raLWg0O{cJ{7Y7{4?t;+0^L%lsafU={0e973SUHY^b>B@qoLiK!c!g}m6N3= zlU62OYQa^2O18|hSbn08+o<0)iW9Pnmz9%)jRTQ9jyz8`snl*#U*?7#3jCy_o%+v5+t;8%jIp6UsRyWdH4qra2EK4YdXW* z_KKmNmp{6JcQ9A0y>P@Q=Q+4igS!4k&4_LP$2@p@pL>Mf?YX}YV>aF3yR{Gl(L*06 zKFZN#Kgc3iinp-_y4Q}*o06B3)^|60y}0*<6(k8 zN;-AKQg53Ap|eGo^L}qC#%gijTk46uUAQ9vf?m4fck!s~;UY zmrCFAStaU-+GI`%b&cN%JHKzFG(3s|9c%6CQ`P? z$OG{D|CBQ~1?7TZmw6JnfjVFEfPb5%N#>L;LgPe)sP@OFrAyoI%ve}i)f(%UM4=LVG`R-1#1XKYUQ5x!Xw7)c7$sziea5AlX5R7v5eS}TV1~VRq5e5gF zk)GZrLn+B5>AfRBY>|IdYmT7RP^kkaXD45^3)YMvlqJ(s*P_{-r*1WxL(U2_D5=%; zy7F^zDbosCo8qSZkL!IfeRAe_r$E?cZapTzvOWMsbzGY4cd42&YH-ch2hT1tQV{n^ zWSKqYo|K6vnLbk!D$rw{RN7@3FaWb8+5{Xg5{Msh4J3DA!uHUn=Zc*CIt2#RvLvus zLxFkRdhJHMacO@M8sJK{Gncq-`BEU)$1Bye^BzCJHORi0s3};1hu^$@{SH`YQSkhJ zyJ7H_wiOm~{R=a_sWSW1J&EEv(L9FM|FFRIUygr(hQsE}-8j$@nOIq24J4;z?%h+q zrH&fzoK=IMaQ=YP>kjxpEZ~{&lj}J)+v#LP1@BNrlh8 zlhyFLSoV0?djmdoy?sD>xQ<@*~S>mE)KjW$42eRf7I`fFTmysi6shv5%D zK8X9gQD%iq*RO=`TwF3WH}5#ZTX&to57W@~7}ua#sC~aA&tFxfNLaN%5*~v=BOzex zK|_;heWl9vJPA;3SKMv&L+yHv5}_F-uI__+I0Jy6L>w750NF z&;ZKeH0e65V2nDuzlR>qmlGfNBK7pzxFN{0mb}0l(c;g}d)9Nfj#30D!K>Nby{*Ta zv(4dQVK@5v`gT4!V?5VKia&B0)jb9+_-WF#iPdY*<{*kKq?&X)=%9$l%F6Euu)vlT zJzSXVFD=NlhH)mnNNSjgmCYI%89Aha&3Jc2bEBi}IDhVgfQWxfgkZB1tv{NWHTQj+ zcMz`kz8V;NELFP*;psL&1)({Da|0+7#j5>zPp_)|$9nBLxv?=khGQ>nkN1}tXl_4! z2~KE>gcRUnT)3^uBBUo@>KxOZOJE%-fNEP9Bu&q_9M&(d$*+6Xpl|1=y zsX~938cR+heSBCd`sQSV-0%UPSw}4XZ`J;v4V+Ev-!kj9tI#3qOcV;RKOzyjXME0! zT-6KFwZU8ydBjo_A1#qYiyCT@X3yinhOUY$7?kP-7*iIW zP2&Ij#S4TE-W$|4!_fxcTF=!;sStK{c5_MbQR7_kW`q}{pn8XvjCX^Ts)5#uaMXd6 zpi*sQ<)&4VBnbWuH*P$!Kl6f8*+IQI6?bh}ET6i}6Mjl9bDY=Lf4bJ)-fE;MI>GS) zwGB;AMb@SF1DgK1r}f5rxoqk?isdHdAcY!_PUNk52FN}z*F)R((V29%ZAPRs)zjXo zco}pFF_-7RA$|O%EmOt2t6fa*&C&Go8(P4?s>y0EU!DxHZmVIVZr#wVnO48XU={fC zWh@>JaL~{%r!AlsxZ{eqfN~tnljn0|!l2&E)thN!KlJTe zeqvyHoy#$^XkH-j7ZMe zvSlct-Nx~)qb|}<4t6}^Bs||5c-y%G@r`!qQ|-xazr~G2A-fO^dbgl%XUqmbTx7lt?Zh`l)Sw94Ey96J~qE9HJfX3$=2p>)1!ah6)TQ9q`cM^2|wy6 zs~Bz^LaU{W`)4UxTZdphvh+Js;0eyEudy}8Y7V7z;^!V8m?tYPbsOd_IF?8f(sbbA z4z9_+b@x(jD0K8uv)TRK8$3kQQ4v&Z5MMzV;H1oXROwD1S-VQ+cOBx;g-U_s)XI;c zKu&UMp|Obln=TH#1HB>Hs<#x(GLhFfh=5Uoy>A}n88(`Q8`qjGM)3z?;cN082FEtq13EKx6Y#m zvP+c{MEZV*#oRfouAKuH#X%9%Y)AY4fpey)>$w7wX#w>;g1*#X)S{Pr8{&`00DG>jF zQVt(s^dzn(7<{ZO6F#5gUz9eK1G*DCP$h5U0|JPwh6@q}?58F$f^F$K*T!ny z3*#0AD1?^u#wq=!b;?HLsJLdXQ6%Z)>uK=->DpTsIP8j2dow4qbqbSEHiO%0Qv0%C z*NG#Chfh&RX_!&{Sl!@18=yB4{yh^_c5f1B&P*&UE#OlstdN<997zQ^T|jyh@+F$O zd$J3N?|e~SQ6%~VLy$wNmZNrKq*w-EGbvG^bfS{aC1ydT4^JH4w-KhY+c|gl?%hYF z=7>Y8#~3uQfjM`lybDpe;O#oVR#`PNIwlC^z_%G^^ekldq}KUO1_QEwk&ThR+xe!!rIdoaX%B$5wPp_4VhNeyGTM?pam1wKq4 zv<($$Nl8N^kJ;1vYn9W=$C@aga;NP+eL9b;zI45rF4cX(Gllg#@hHwjH?Tc_h`H0{ z6Ls6&;+chy9z8PcNs@H?{lbeRX$J*C|X@S6l>E{4~fozWR;*<68sLICWxg2~4K4d%aYE+J= zmwn3j_id{7kH@d?(dQrKcd9IOM5#cUyXnMSKI9hEbYNUX^#QX2xZ{?zd?E4j=`)ww zfbXF4$DXK4hOCDrpCx8{2%H&~|8~Ptp2e(B*0uwwnTbuF)AQdNzsew=N9ex-CV)U+ zsrgR=J@Dzx>NJCe+UbC%K8LhqNI(}%SPh4vF>ZQG;0ZpPjif-C_uko<<^>SrHnr$1 z9j2%mV8;x@T68)TDddFhZ3nS6>-XcrA_{&(8c8m~uzz6UJ}_rK zk{v)tOTOBAE1k%n@Y$ScJX{dr;ZeoyZt(U<9poi=uUDIo;y}7aSsfS5S#YcJZu{(5 z1<X`HAt%yz<~ z4d>kxMowL}RecazirPO76J`iklN#mFff%X)dwN)5_b9&xJG8wbb>vZS_R6gmO+Pr; zaBEhr$hA#^s+IOOUVprEaj4{bq@bQl=ZkeEyJ^42Euqu4cA+_nFd@ObzB(%Y36Kg_ z8_E;C4)G~A^umZ}@KoJ__)i>&8$LBb&NovYf1nj?S{Twt>ocw&*H>2`0Uua@VXHIw zgKDB!=a)BBYwSM@i-YjLoP8UNjIU0W=WNsHV)(SKt`z|o()fXpFjsZ8ZssF%?Pz%2 z{^IzfOyzXVTy0%3*BTi@B?FDoognV9r2Ms!{))~=bYeAUedQva4ixExcUSgv3_QN? z14Z&_JqcaUktwKl~WvIhB|WxB?(NggA91k0CC7 zaYKyk-m$7(&2-ve0_YpANHs-8K2{9_(3TfvgZG+?o&12QBh*et>u_YTWHefAR>);o zGjK@H%HZsK_5u|ZVyN=93q6Mh3OFhR|_7 zeE86*le;o@NppG$Rf6r{ES!fMS&2x9h{Q%X`T*CV2_e_N#zXk~*ZHFiDFEq|&&0Hk z&ZhKPmib%V)#fxSzk@6(q@jq zNOeG4Y?_)*PW4Z7z_y~^g-u;N(tg)R9 z`ABuRyy)@aHexOfzCCi_A*RA$k#7!crj6)UgP8jis%W9w^fTn-9hB@g#-5;hvW%>z z-Cc6-7G*ugDssl|F84LRq>m!G?wW|?=>2)lKof&MtOj9>PaqWr{vbXP@U+juqV|yG zgDF{}TLLo~cW>nvEm`n@P=CfT9e|F>b&Pkvd@qbz*0dF?8KH68p$mVmIPTSbE^cGR>bS?NpP|Tj@P$ZF42(vN-M@(l*a@GnLh65xv zz{nw7J69A;!YHVmeZ!O$o}iMRGZ(nY}pzvPDQ0=tJXEG#Sp0crlE z=c)6el~#Sw*DarR8dxsd;sWydp~Ju)vpN2f-RHS{fGjF1iYO5AEpaiTjUQUeuH_JR zUl~vUzP-UWPT^z@ioimndOcd;bVuu~p-ZUg=(q{Cxn}_okXZrwjsh{3FGuViCpm;Q zs`uqcxI)1OV(7JHOA15*4lCO8%?j?TVVKu-qI6)Du^zvib3rk422|p|uz{Oib zFJHXKMkyuoxLX>YIyekunZkmNS)0SWrTYE-CB^Lg(l7S-rEl=rQVnZAxIa&ZJD%;b zkQl^ib!F=Q{pOE)YdSFtCY*02Go{kaEVz+mj>LR;kGs~w-;^hRky z7Lx2~5V&7vVP{XK3Tz&z+`_(u@Sf+vTBS?QJd_gD3d-_>}J(WOP;rQ5*m%o#R7oQ~9Q0!IB6FYQB630dgX8 z_|6JI4p5GbOrQ?mHk2x>Db*GwQI&;|0y`oZ^4>6}n6sej9*c6UF;1}=<8>hr>L2On zyNwVw>rYq6EG)BjP*m*G3j7@$PpSGbnGzsVR>ScA!S-aPJ;U*|nv#<6T`szw?wuI- zDH>LjmSwZ}7hk8PbqZ<=-oLxLo9XL)d6>EDSUUMR?chCMjRGt$qhzz5n{3|KEC&Q` zv~DK3wDUEIW;r`W(Gdoz)?aB4;m_7~qy4gxN2=~6 z5P1aN+SI}BhJDT^hoLzLJBE^{tnhttqiF3ny9cb6PoLNb62?G*;W)L#e za(*hO?sFI+rwT;sOm>QE)&W?s;rKzQ(1#Ez+*J=pm89HoznuR?-0#o>2nDGqv{WB} zmo_btL5Z$h?CkBBXAv2lW97p}oFJZ9jItToe}0DN11W>gEqKiyNQDxB-xjE$AXJhR z1H!ZDp?rSd-{q=~`bVTHp(23DA4~_N^;t^wH#OY@p^Gk@)Z=^io}2X~)%U}X|5i|z z;L1=8lpifQzqQ%Ct1#EHq+p!#*yJvNJJdj@*}A<=E|KZa$2}6yYOggb1vD@al?ZJE zA#N3j%yS(>*C;E+W(2K~U2h8UpxgIkmwkecc8Zh~*5Eg|X;lfGREBUq>q(ZL9y6D+ z#dT17kf;ViLpd}*8LKup5c0dqNfM))9@={C`@pS^0bMSxfyW;d``S97bMFtLexi4Y zMh5@GCn353*0x?uAtLgiuXmau#M?oeD3J@klzas=HL&GWfmW;D5lI%)CQti*(U-f+ z+HCC-%lY#efW2c0-hn>8-+WSP?p+Q+lagQPSV(t4soQYf(<`r$2SHmXR!O(=Or@Xd z$-dOZ1EkgY^RpK&gzJP|5HX}(9Iv!b&Th)1R@DNr*6b2=PDAdX`c+}5Ev-Y1K!D!I z2NZrqt2WT0XFv^+-BuB_M zgVhCiX^Y8u=NTAy@dvwLs7V;sps1#{BqRIdlB%aibLrkBL|>~ClQL)z**ofnppFF*FBkXul^bLRt@;=-bNoF7n_CtNh#$tWp>@pYNVJcjNK@lk-z zIWoYZ5+5W)>DOD-Mw6BVgYjWbovAdMD&-Fttj`)L@}ObqScF!kdrh2>6T@ zLZofP4@*%evy^0d3JSr|#aJYHwqT-?W*i9<2%4IaD4U}FQH$iLAD*(`Oa?;>h?pWU zKQI17qwC>`^^De*EEO1wOwa_cKYl@M)Uc34&bi>t;bwCs0Rpu>KaJD9IwS6)pRZHI z#Zw2S6^V!fdYtRA+aU02t6TiE`b&%T*IpvREDe|%>&R5m?{e`!zGgL67Iv0iLVhb) zqUJ=aS3uCrZHXR`{bT{QLy`AQP;g?ja5~AouPHa3X|z9jt&Wl>rPW@XZ{70idiwD`Z&wG*IGb zZ!XiH$&fMtnc4}{E|H#l$FB^@b+9yB0x+N#qXkm#%76tWNTdk_6jJ*OZ2poYdNPSE z4#W>^Xc2wU?sdwJb(b72{U}Q!veS70d47YAj_QUdBv>}|Ah0oX;zwbA_@K6DCyR$- z;V*-NI`+QXcPzwImf-Z>>-wuwT3U(w`F-sZ*?AF>N@^IKGl9IevPA8nD zaI?O0aZ^iAcQ?VgJM0L5FwXdtBjDPofV>!p4C+=pW%!=?tXp%fhNH)_{JT-TmjW;- zRUoaPcW{7`t94{Ym<6buaC&PyQq+iXrI`evd3Ch3_1AtOBH9!F5pHqN6M%mJi8?Ye zcBj2tK-Y3tQtOFMPZ!w6r^;mW`RwWQIn8^AM@O5yXC5Q2Y8WbG%1s8{hX$~v>ceDF zACbZMXxl=NB#T`t`|{-?$aa=g-XaQk9?7YrN(j$ogMB_qiBwz!+4RziRK05HjW4CZ zfb0a{MuowFl|o<`bmNzahO>)czQKmUtZE=zqZLCKlc?XFifE4j#u!hjxcj<5=khV{ z`(N59(H;69bVY-@&0M!F|e;5TnQ8{GN;yJ>hehxq}>5>;~I} zzcZcl@5IPRCc>y1&F@L0NLz5y_qgy5Tx@J#aNRn3Mf(oD*i;74tn#Zs99gvxh4x9o z*US4H-i^EUB#nuALF=0)?YWK!#-k%qU~O?)4dw(6_LRjR8SI>&_O?Jqun-XiUcPdL zP0|bVp_RcO$aE6~Gi@Kpdn%ta&4A|x*56G>N5@RcstEedzbGleXz4A00gnKc<6-dG ze1_u87b)n>f%{M0u*u(%c>3ip8h-P4s9X#Nb4b4Mv*HjDO@kg0pw>^>{PgKl)6JPw z?W<4ANqtf#c`2p#(t#`1dc;Xg6tIh|H1)LabaKX`xt7erSqqB! zX~$MNfZB=jsXHPfBG%6TI+OrjfagB5qF$N2I>+3$KA8*VEgy75j&5MNIjrGKEYRoy zO$uSqSM0N97eKU6-q!Zr;F%&EF&I1+^W3Nh5M3H>0C;`8&)%&5qvEG!g5Strn?HVk zi(lexP@FJyf(=-L#d2FGENJ5gR*DKZ5{M+^s+|^0x8~cOfH4khD9D{_4UHKswSY;f zPY6u-v@^%ZgLo)5_E`8Xbn|jLRk7cWs_qdP# zbX|=0Y#mtZDuJU~xr<=P`?CH=+Nx8V_9Z3fl#GpEV-4BS-eDjds0+gR3$w41D_S1q zWu`7%%RQ}|Z`#pL7P--IU?ApFW%$R~{TyK#I7|quqWUvb0#zs?d;!lbdHR){FcY$> zzt9I}rub~e3lf#_wn+leDOo<`+M6u4>(_OL3XHfV?X5Y1h8sDQuRpF49Mo`RCAcC- z?-aO=JysAK#+w^_MAQ&kO}G;BR_@+5+fptBJ$3TAD5xXvga0-y3i#4-be zanFjk5+`=cbwv|QMkQmt*TZYq`-h1qUvYv7Ilm#B73luXcw&F;?kGS7o^%Mr0+VNcs(9C^RKc*BmpRMrTLCkAh>sdukNaR|J(=YcD(b~s+o6WR4i?Y?o(DF#Ua3>yJi!#U6#_=7RH#ryHje4Sr*Pvpptx3w>(uFefG%~ zDl1|kBj!OHu(cTpF(%1#9@6-ULbYV6#Vfh=XE?11mTn%?OPJjI8kriG>H*sC?;W;5 z4mCOklB85MdgeSdsIZR`%Uiq3SE@{TrB}{+uwO~;psC`LLoxtz=<-8v5W6~IlbFp6 z0K}pq6$eL79`<28UH~^A;3o}`IzDvvCJ&AXiYX^AC1Gm@9Fq7U z#okaM1Ol{F`{^9x8VRcZ9Ae38M#yonP1J!zh0U~mK=z(NwUe1M!#O%J(<7-Y2Jz2i z1rd%Ik&X{HWV#xnwY`TH9&<`1137ko`~ij3@$k-l2q^}SH+08Fk!;G{s4ttU8uyE<7rZ7KP(#2oR&Mm+9@-I%O`sc8TWC%N}ddS`#etBfbj zw?vl`{<87^f#$}to%#H1Cvpx|GDj}#zCO`3=I=X2tI`d7eClUi zJ_YLgN9=@Vj1q6Y)%%*=c7S&)j~0Ewg&Cg*0&B;p1^RYDn(p=R2qb92LW+p^+5Y-k z`cAjwjlo?O|HJQDWtO)ie~1_}z(BtL!om3=sHub&aiF1)pq(gdVykEF+}W-#O&N>7buH-85pQv zh{aYJ+NzJ}}NzTJIaJK%~Wu)h!JhU%i zZ^3Ov_ILpL2`0>A$yANPz;@srgZsk?V=FJGtGGlJZ&J{6NZv}f&lLjA2#()@k=?O` z$eh<}SL0ptH)SI^R$&mQSV5V6PCR!q&A7l!yEzz#2l*+)!U25m+nLhoY@|yqq`_xj z;~dw@AtA&C#@l|3pSeGL1q;o%(F$xH*mPlM?o`aMkvF1FND4cku_)vvjdeSAy?ILA z03)bcNnG=8#i^k4M{)`Pjk`zGOfI$Asf)mPobVaJfm~+XN!*PaH_WSbYT#KaOQai_ z`Fy`$s!S+04kWXSU{I}^?Qdw%1_;onjDRuW zUo%2Po7}W|1a$04HJo<;%(g4&g9M+mZPI<(Zkl}a9PPJ$4j1?lF1PM`%T;q_HG|P= z=d1+Zol%xeA2ApF=2wSpFprx7SlZ4-3#&Vin>B$-`g8C}RaFQ^%l53ZYuY$oyGzLmupxlUY_(x|Cngw_i{Ng7fBI z2(+8pM_^PMn4u3S-mG}vRNZ-2*-Lp;)>LXEo;SY0n&j~#Kk0l{CCeW$7W4Wc6C1W_ z7yZ0h4+`Prbtx)|2&q=ul3u)Ok_l0kV&x65@W^%Xgzko-q?cbU?hz3|WU8ty|Az9A zzl6=!0%F2OI5&i)LWu6x!Gg*Nvj^Y?3%5m#8Z6$}W6 z=wb=3=9-PecMv*a$=eV}gck{4!X^_=&zH0c&Wt^ItdSUMkFq}V&FMAV9iduu< zf8Z2DFXtkV5fy>ND&u_$K-)Y2r!)4J1OqS|gNCo>$spQO121f8w`#zRV%GGDr|wEC z4+eL+PdL6B00iVz5y(W^l==wU40Sa`9V|Z~r4WVy^IUn~r9hyjDBINhBF{^jnPg6Q zzAmRbhV7^Wk?gbeCVL`poRl@UZ)J}9@@0$XMXC(F!zqbD#fDL^L@B29?IH3`ATE^4 zC4AiY?uA)?5^b3Oe0AP;xb^q&-vY6Zu}4O*xhCsLBiXs~Y5l)a>m&?G5D{(4g2F`D zm>}8MkXzM^Al!oiJlKi_u4F#p%&_lrTc2RB_udtHB9`$)ShC6uK)`sZnQVj3jlJ)4 zDt`|PW*QJ1;El^b2>AX5TPa0wXP+!q#e1vtxpejkg@1SYf&B^bB=h zw5bXvCPSSKqWltV3&0%t-Ad8KOlsvxM|ioF$m1A8{0hd`U6MytI;LQ;pmp5UAIo$t zYiXmVr{6QV#@~WDXAcXssW45KkjzcFqj6M5q0t3by@a%j>bz84mc9P7J?)dX{mRA z1IU%a7Kt1bSp%OQ%1D7(CjuF!(V)GFG^U$Tz{qqHR6PDHL!iu=f@CI3Sz+0?K%={9 zU~VcCVteSs%v4qunXNu(J$Pm`N6|sP>S%UH`E9893rkh)=Tgz)8hIvBX<|Po+80pu!C`q zeT8yhGb<P_I%AGRMH%$$Qi1gtG!m1 zizZS2VZd5WB-X!$R5Ue?> zH#`9BRfFj3T3DT3A_>f}uoFb__{_VmfD!LksFPFoc5jz+)byLxgGs}!maF!U3+<2L zHTOX8*n%{>R{MYddO0-5GznOxz^Xr$L`<~FR}irW1`weK|BCTn$ol&=DT zhg`XR58O|~2B$||;=VD(vUiH8K^~HY2Bu_>#P&O~AdbmP)@I}2ehZ-fdrk82=vQ=s z;B<=x3#f$a8{tWf{1wB&YBI50}Ql-05JzZBNcMsN@VqO@;L}z zAbdL!(Vxq0?_zzzE*P*2A@{Jvis!$~DqH!eU6;B-ArYqi#3c|r0EW{RKq$NiE>TZ% z@F3(GyYQq-1d7wJ!mv}ADDo5`?8E9s(7P~#On~Q(9ol6;l?bW_;4mM-=~aYGgPA1` zz}Xv6Xg^z^Glk^r_KuL^~nz1s7i5hm83TIXV*wbE6>xveEDuaZ>Po4C7L=es(ri_&6 z^(YI#*qYGVH6i5i8VZcLnt@4<%tA0NVFt>&X2%O1Bt8I?yG|O5geRe|H9x60CX5Uh zJ&{2UFZcsVW(%C+rCpu8$2SP~$UxODHw38yOW?;tP47vK$D3jBDA%Eo=mv_Lf!(t6kLLqy?5yS_MT{@>RNq$Kyk|o>qzvu zz0b}9(8L9>4fuQ~AtT^k1t4Gm+t+dHRbK*$2<1SZ&oI=H3?P(Sh2K)1921`;kq3B3 z<$v+^)^Sno@4omTilQPaAQFlqNQ06BQYz9RDQzGjDV+ltC@9@6Ev+<2ib^9e!q7tw zDWLRF_gT2V=bZ03_uk+A>ofM?&VXYhc7D3H*3KChWA$n z{MS1F(%QJ3{7cf`jMcjT^Yn+7j*duYoTrdPTW%EI8a8%Ckm0QosYF6F0Z;X6DChl; zb*qkBR!~?YS`$D;nDTxOA%&3`>ix2VX!f8Eb_0wHVF+Z;UIry}PrQB#l~shVD2UHE z+}9Vbd)8^sxT%7ru>y+ig)`TCl9oVpg%~(y=l9m3!)xzh49_P&dWt?6IeeD@1G3j= zVXBC`_@b}y$=hLKRZ2vwu-$?sWy#;TJ`oZf$AnJTC^}@I*_;N-8 zG}KD3MCveK8yj2cY^olzUuGx6YG$cPLmrJHj>@_QT%q8N9yQcGf^+;Md;fkkQuoF) zrStDx2dd2eY=i>vAEYjaYOeD0Yj9Vus;PJB%vSyeAufc;?E={pXERiO53D2!JGSnO^Xq2mh1np1fZE`uNK$VP$2rl&uG_0%@9sQpi=&&l0P6hgLg;d-4Q`(16!n{Iv^hy++1RcBwZejAZ^5NH=6R-xkEMKO{6?lFWpf%dQerJ6qf=Ax>02G|zV850HoM@If@mAEmytai(rd$#i zq}oHoVzU-?nNXo2XOVg`4Qh1P!Rt;#95>Wv*Qm;${`~Nepc`QUOM9!}i2R1Le!X&C zl=-rbio7Mfu(R_uoxuAZ2o$#rApxxg_=*)363lKUl;};!UIFiZeDuZFwgys@#>`+7 z+0`6osD_uZE#f?il1uEeR*(&kQH{PyM||`4Z8#k;f6XP{dNV$Qa9PdZ)>Sf+2=6a8 zS4T%ny=aHBAQVXAP7gS{4A5)cdHJK&hK~$53d|&p%)MRVMMRt&n4_poaWea-E=3wy zYFnOU0e?4;h|R0Y@dp`0v&77Pi$AnM(~pEl1icC8F)~C{p8!eND;i-#fM>dk-uW$q z!R~UAi6eqANUiZM$k8dv5Wa4W#2Kz`+E~F3{%63VpkHDOx9aE!upYidOQjP-@eV~P zCdO6@4=clsFjQgR-i!dBtn`1kKl5D>^RhK87*u7TGW}R)?1w>Xi zJ3rqH>IiP|CWS-L5+6x8Q>C&7h+A)gH%9_?6iD8aQy=b^=1{C$FAGo9&v=*%WgI9; zlg-4sfY`FhU^x`QrKcqVxjTqHkyK(&598VLmZzp~Z+1UJn+k#E+vhX(suCRzzjrCN z?C*|0{25SD417b?8-LGpN`K!dfp?>YfaD``;H_-_Rbi)v`xPKVhqp~bcUYDq3jrHf z*NA1v#?4-(fyRD1$Q;V?5@i%BaB69Z2i_jhY`Z+IUr&Kx*=xQA)k7qNc1;>JA7V9k zNRlt`PIgGBSpISXusB0rV3@;U#kIYx4JRoh;*c2RWcYeOnho!-pv*Wk|n zJk2@$0}+@r^YW@8#Z%BB)>Cdbe}^h?U&rx)aRpARb|i5HiN8VW;CSBMYbJ0!cY~;F zN7cty3hIf8ow--?IYs8*1OXwAcvii$6@C4&qS0+Zz>34gJqeRMKXe~1BfSR^9SU{) zwviD;_b@`i0t;BZc5Yq)!IO}Z7{iGmXeM*Z!inH^ikq9sj zWzY0dnYpZ@iUtN@Ps8sJ2<1@WLWlZRxaL`xG6PWr zer6W{wM{x=o*iQ5b9{ml^HJo{%ViluowiRg1}b$D6!>jRWk^%H zIr?H};{sCL!fA%=c#1|Uw438^c_yJeMg$xl2H@Y`fu;o?{T=GWj!(i6f%vKM1n4C} zF3t%QJj7%GPQvbDij+5_H+1`*zP>qAXg4K~!II0+3R!c4;a0#iJxT|A{mc7hezz={ zyq4T3PHjT+NC?%}5~Qn>h3aDg7!&{3TItnLyG&Ki=ks~y-N|8@0-1JfjX(H19!`Tw z52)_`Rby`?J4!4B^fmTJX!71=I*4^EtT4bKy1I6!omn9=`sok1Lq6SuGj8}fV$^86 zn!5TFuvPpl?FK*!th0KLYt(qSs?*=u5QWw1{?i?LN>n`Zx_jt4m7*m>ti-=AuSae; zPoBO+j4FTS`-K1W=<)WM`2b%l{RqR=94*<;2X(hDf+Cp z>5Yvc`*uzDVz;7;wNU)jW&JK1S)Lrg5wkx0FIwr?R#8GI5KYmSRrTR)>c#+HnpBGxR-X2c%2xnxq|m)a>2?+fwLiLhN3)yIzImqk zC@?4}N`}Ex0|Cs`Lui&Y26nn!pK(S8GT`Ae@pY{tF>an}7`C>V=s z-yi>?U`US3{EzsqNRoX1aH9-G8uza>1bs7EH6C2SWuDAgzyI&>F05qyKgmEc5Q&u9 z(dlaOM6LR;qk}^+5-lVS);m@tKLHS3S!m-H*jTMAPf4uzJ+_+&5Q=RLoG0x1YwI+T00mw!&?%cf_Yc&E{zos?V4e82&o{uZgF063m5Tk6f9lW6YS^NXy zytptc{+V)zSv46P9hd`Se=w|mO^mayY_8HiGD@?zv!X(C?SZ#zZH9IXW}^?aXY=JH zDSk~M2;n%C<)gT*RD8l5j2?o5i`RXf6AVa7=G}LKE~HN?y}bQp5~IS>s=yNv4AsPU z`XXWvD3mzT4mggOi#&fhj<%?Vb|`_$_DbjTF1&u2F&Dx^DY_1k#haTi{?s3xn1Z06 zD0u$;>Z4?k@|9@t*(s*30DWKQ?93B)KnJPKdvjyN)Le8)@9+eWEv4HKt*#l7pYsG~ zY=&D5x^6t%;d^?_z4+p}5jd8^7_R-Cas*GdTB)T@5f{dGmhZ>S$EO^g*8Oy(Gq857 zB~O$|VgR!(TqHu`Jym%YC~im)p*wqae3QsC!5M00^TlIA7OILbs^Hi8h$E|a@?2N& zF{5Q+kjPeK8sw_zKsU)WENDKe*U-Lr?8*ESt2N@|)kAq0{+Z093TD{DD5dvkKcZnuhSOuNa4B1nGnho@XF0efxC zbrfn(>E6q@wYB-Z?WHIPNo=twsJ@EGp2UtwKEvd+%=S#K?hb(Ye-bPng_$VT|AI?F zpp^HciFxqfS7v=~OWM!tIq@kLarIAiCX4YtsJjg%+|fYo-RJI*I(4&hC7oe4DuA8G zlYldNa7nq0zA=$*jA0aP;0qys6C;tU#r*E8F4|?kE>RmfL3rcE3uhgz0+by(q@`*x zQ?(gc6N?44Ns5V^K075-!CRdke3k1!SRKA#;m>)i9N}2?tIwl@uNp+fIVa!j}hlqX6Wdatk7!dwF<@ry(KAfbe{^eiX@@o z_s-lHZr;voO(H3RLd`@6tjjyU8yO%(D_(qmfWIlX^`+Jj8U*mhV6Veh2cm;ZK>CV^ zxP*0%)n#FFa0r0joukbo3}TvJsg*h5hFYeFwmK3rNabKDPJ!=~f7=iB2LS7T<2C{cUdKVn@J~8a9%1?)UAsYry^?AX zC@%5r=qb61KU2ogT_WMS7g@xTZ#9I{?`$Yn*6d0Q?WFQoeE4ntDh`{gdqvEqva}Br znCE}Wf1!O~rwhsg(*8LjPnI(6e5`U_lV0+zBaLORD!IDImmIyYv#yXg1?O9APG5QI zp+etGf#p0FMv{-kA*)yZCDmoX@qCO&Sxa*ryS)W6>?|rROGN+ z1~FkN$rmwTH7(V#c2ErBx>3D@Vt%zqJ@ZV{)2j<&6N*O6_mNT@Y9-G7ug~-J%H-j6 z68?lj$-hSy%${!n(J?eR+l2~mZ@6N<3+5OnWr9Ju;8|toDzLg-J zH#PT`J2)g27jfmS@SKPG*@?WZDOnfetyFwYVG%FWY^OLIomKlvv^k;Z_jS=UhyLud zr(IUl`y6%EZoPvc1aet#3b!P3CLpLP&q7pZdBXYCDWUmO^PteZjI_>c84JusoRDnO zfDgqI#;l%owS*CTe#6apD$dVstxNM6N{p+VT0^VCdKFLd^|8NsNdr!nMs|(6U${vb z#d<;l$LYMmFdB&(ZSE!yF4i3b(ahssVlqZbmR;Lty zAlgG0y0#j1Zv=AlTHv+6(OM@$rXxfT_9TwyLWBn>;GXSx*N9W=&T8vHX|w=*Pr1(R zFFiThWk6N*@H)nA!Tuh5a5Qv`(uv>~PyanwwFKdi2k`4gSwgF91(>e@Po!~a`ela=6ODwR;HCuQ(_ZBbA?rqQZ z*!>j`dz}B1kig8KL{Q~k)Y4|W@`YKnmfo}LcoSLhFR!Ig zK{9J3)2ju##0%tyQe2{cg&I`2{#jj`dVV#4i7(ZbrE#W$a~^HLQf+Omy$}%xGFp>U zRi9sg&i1liCh;DWb}sjFz+18=-|*25H+;srcf$n;ln`Nid^s~y?wHB23c2{Bq)t*1 zo9ZREqy#j1=1T9!m%jH(x~nm(2ZlAsHHfw?BJZA(DOep73ArUPLo0!<+8Q;Gu+^?? z$4p5^;l@u^p+*#6Ja&5?m~ToL$e;f?Xj5yIFPn0iIw7Co)Gh$8f9i;8W#meBStu<_f~x$^OUt7Cv#IyQR0INx??5pLm2Pfgx zKKgU#*r^Q$?&#_dtbXYI^T(unVf;key|?xtO;xserlUSTO{Lgg^hlv3{2 zY2>Ia4wSS)3_+b#X4pfP{ZlCG)ejSjBQ6+1oAe%-3}CcuOH32BRGx zeV8&Zhqf}`0zt5##re4!;X8u(D7~h5$iAFp5ZAvObkT}XIWkN5pq)X4f}F4mIHy6Y zgTNDxvDs!WW#2dq2J-~A3zI}>`if=t<7!S-7{1NcDq!TRNn$ekk;G}Vzq&7IG1AEE z1j9fEqFE*f86A5tH;WO zhU7l!BU(; z5P1++tD(&}L9&Bv%M^C%G&D3Y+C^gvPz@lEM%^873ORLinw!P!xfTp$-qT&SMTHAFnqQZeY*wAnjT%CLcBDURQIhA7oUfb(prY!n^&o_jo76ZP z=n#9G0t^*OdSj`-=rzJ&)Uh4-f>aOf0v-k_1d}@oeGnF}G1r@CQF9czHR|b7Ezo!> z!2ScYB?=FUq!_O!(SSQ{8H5|ztoyWjMaZA~QHh@OBV`vCH(ab* zQ9kgGgBcr%$AxBh5IBNFB^BjGC9F{}#e$ zy?@zB&Sx+_KHl~^k}$>$mxQ{Z#34L?bQ9^AkRakzM$q4Y6V|K-s|xXm_RzIT2za6v zljl=?8*MJS50qV3;lla;tg({8q+~PsFI@o0SY}0Odqe=)yxu zM8?fL?aei>!GYy7s|u)L@BM?!IJg#tTY!m`nPcuI!iDenDX_+w}EHuk>vV`0F-p4py=JvAm|Q%m;1? z7ytL~FU@wPzCM6c1(;|kD6W9~N5C@zsA9w&UO_mC+UWMSN3u&cXlsxW1BmYkw%iLn zt=SgA%IWFp%@E^r(8_p5Bqnwcj-#YOf=8SY0hlVbY>nN#<|I>fAn1B!WNpPf&)XA5kIk(-+?7_5H>0%<}Rb&R-o z3Iz=f8~M#;N%SjKF{!caocvHkB(v(ct6yvuvO3qpQJZ29IAN|%k?-+WsL}T@1fw5b z=ZZsj!QbER$CBBA`(bo^Cv$=Ywt!r7^u7}vwwOW@n{-6pAJNbKFa<-sjWQy+s`Hw- z0Nax!Y4XtyeIg>%4gaKJ_}4ez)dU^5B=YnChd#T2gginh@7GlU$qEZBw@#UL!YnbX zh}%0U6P@=t)t9tWgch=p}?YpS{)IlTeh6yJaO64K^j-ba!#MAyTP1soi&-~4e; z$GM-^jZSc|uqce5U6)rJzvw6I;UF4aZgh&dQC_j-Hz@a>`E4^@7f^mgzQnOS{g`51 z_PKz;wEfV{&<$A+wRdZ74{>Me8ew=t$KiN^K6;ph5CmJ+DDFV1z&E*xXQ{Zi})t88^D?poufA9A z8h#>=HKcb(IOqtoG~n&&0AwLS)!E2Ewb9!D9L?jB2jvb$payN=O@K>5iNJQ2hQ|Cm z|D*@tL@lub7K%nMLZRPynLtGTK6x58)HuhVRNlSUihYxiI1qxlxj8~2H6!v7LglH( zuV0%$t_{KOs@>(vL*1U2u%&+-CMWT-KKr@9hbWXVUiq{8Ybz|#`55oeTPhv7a1XZ7 zfJQ7dz>SUnFQeR&lBF1`7m9L@TLR@U1@lb8L46*JhW837H%8p%xiH!ykq~0Efudv) z(;0(R#6SNjr=X_~wnyMP=P=nFNRs?)4p2!nQ0*@KEqrk4ES^Xns)n@_qF^+G)(JZZ zw!XM9E;!znm;UoHdqirz=uwc2f?{Y@K=m3(^=A8vGH2)0hil>Ftbme0$;h0!1V*m0 zXA@nNRMB`77#ImiV{DCa+VGT`l-Hy<-m%MM8BxOT>ypdg`nfc;Q=puL1&kdLHa zdW2C4;{rmCXn4U+n^73so&6D!lt7*I<)z;#`Wuof^-kGQN3ph+ii(OpD=Abqm`+uw zOJHaL55H$#@cl{$^I1>^?^z-9U62!Q6Y5}A8y=AVY>0oie=|Bkt+UTjjZD2D?a z+pE7=d1H2Qw{WV}*rr#cVsrdz=+)Wv@bNPYahFbHScTLRFO|vozZU0)6uT&>YV@qj z$4K0ZH)oDc^O<(=x7s{w?dS;o9rnHD+wh#c@ttzxw-%{d#fR*Ojknhg?0SUD1|0AfBm>`2co6Jja>!8N>CcYG}3}mj2xQ5v(0ow?OY61HHqfA2T57SKL1mmt0Y1ok>cUq#B$#l~-a7lCP z6*tz?9bA1W zS7z5Y5a0~fE5>dAmnrSdIPuq@G=Y7!j4ylslXxS6_Se9LC@Dv`5n4(SkIj$$I7gAyOrkM>B8D{-9UDuf zrVDy>1ePZOU%SjkW^7T{KkI67-{6I|?r!t!w7SIR43v{PUP3k_isk*UX9^1)=e3YT zXC3jkx6RBxkiL9+sZnpHbOi~a+8FUtnM-i@eHq35q?+2nDM37B$Wj8c^CH(OXSb$^ zb<#wqRtQU|_L1Ipx98n$FId6Umc7~3@tRH?zTUgNc-R%}(TWk)o?MuT8w2~W%eIy3 z<6eW6Jh$DX9-aZ)_XfL=G|3) z!D_=RVxNul7RK;k`&1B}x3SaQv-kIvH-P%1L4*tBX@?61n&ypBI!UIU9>s)YOucz} zPp)HJ76YpC@!)$RmAvp@4&^%2={`1OcD^rP0;|}yd)afSHJoi?^KpLite#D^rdEL= z7kF4;R1-r=tG{>wb&1(!B|U@725uvdty%+5<&?u{yTF}_ht-O7e77#Tj1nPX#=qDS zRbJe2OJY%uy_B?bLq1keRHfVA|ACRHp7HLVghJ0*5=lr&COv>tS*yQ`@X{2 zQ%ubQWkQ99eDTBI<7KTgn!XeWy-8ZTxa8k4EgxCJWhX>tD(9P2wL8G%x*ws^W}yT! z`!@dF)Ax-;4Ybuev7NAUArn{AKpKBFnAc{HoY!;DEK9q#)EIx?%R6q?z6d{UWoK)X z;%Xf~vPXRlB@-t6$=@G(~UYr~P zGKQW{-zhR|!FE1ZVs{%j37Ghe$= z=N9tG=E2}+n;<57aVD}K!nmrX8=*CVOjlW2j0Ve^Dom4DqW+an!$CGa~x5#3?`iWu3Q!n2q3!n3|P%cIK7xpSQ zot@_yNy?bkocSGr_vuC_DwQ50P<7kQH73Pfu_MEH2l&oCiZ|}RFg~Irk&v;WEpfA} zKpE$pkR+Z&yUA@vnB`9>^Zi*O>{(kf>#=rSg4Y{`ORg95lJLqxf7j{GcQ>slwRheq z<95w6pBKc7yL=(_fAy&PFiawlRh4{yn1o6RgRNdzpeH@f?NTWko8y*>Ham0wj#ti} zvK2wtZ*Sz%{&Sn@z>Hd~#HFQ)&?_o?U08SRk2*(=9#sG#L^_1a$qaLUJQhW|a$cua zop{=fS3|1onG9d8_U%Q&&?c1!opbS}`A_Zby*LZ8J?Rp~`#}~Xy!Upt<59Mpp1VlP z*RM6Zora;BVPMj1!G>l}SukMPBlE6KUCEK{`7n{GnP=;(5tz;Vq=7>&!mjs+-tJ=? z^>0t77}*ulKhUb>4PD3Np)oYz#(>PI#)@pD`tTSM!dpfFGVX>#IWdV|PiSXXz{cgCDmJrbyPv^#6 zAOt|g&fsj5AmpFK{hL5gAkkdhr1{wfHdWT<-vPd4mBcOu$ZP@EiWs30P!p{ z{8Ny7Gp6)BTPbXQW!1ai9)GYTS0t3Drbmd4u$p5%@J%IU;poi`vQx!!51@<{z-sSV z&B|S&R|C*$=D`$RU+#xu#42abEcUVPU9H%(>2o)g^h})Gd?xZUFel<=l>OHP+H1%q zo0IGDpiXLTX$b`cvp-d`Ib;6z)9kg_u#n53wcJT^DP#!u z+1aZz2@8Ki1I2A;(77U&>Z)hYg^@|3zgN7IYt-RNEJ<2o+0Sc^tEuMHFIT1IH+v97 z@I^f1|F^pTe?QsiUi|;Q_z!!5Q4dpm%c;Hfp@`El><@-qLN>|pU)+;ebNeTj0T!yD ziqGl#n8Hu*=y&r=Fj4=1)c!N4N)Af@j)Hj2_*J`aU<9=Kq^;C7yq(?}*-{gan;P#` zVp8h<#rExrxS-TI#X-gwM@MW97%Z^Y@*oP{A=?JBb+g|{5C@2_VyDW=AvLCXQ||H9 zxV0lQFQb0pw~~7tx-i8B)V|$_bT-l+FGl!-xu2qWYctpwlnoIn$N2XDe4j zb()maB_;E74XSnBMt*96k%kS}<4O<4uJZvGt)EfH_5`=AyeqL)eCWMkR%9h&(_N

4t!*vE@7OJgwJj8spo{5%lx zK_W6qKR|xg0UWVR;u4Mcw+H$eYq>|b^}L2T43;cnHtrU% z8vAkXJ}jaiNMgm}T$4*)&+I+6AVf`>`0s3fvl*uz<-s~xFvSp1%Zoh{q`SiEGPyw*lJ zY`=ne=h!R6h#mUbS4p?%Wv!v*R@xS~9v9BWa5bvb#!T!>FY%~JrJ`}>R8y-;4M&p4 zazOvS8O*p4qeA%yX}SKwWCp=}KrCJ?Yq5aZ*OwB{6V0P-;2iNYU(di~hyU)O7sPFX zd$R89zaaVVw%tetb@WDqOt;n4SXx7&<6u(v^1@2Hd)8y4Fr=cGz>}#}sA*>DfBJK8 zrE{V}PiQGTu7HkdEP1SOW?I@#Yq)43%iG2srW_(up7@NCT=6vSl&r20X?6)PrXXRT zVmSKoB~n2;b;9}n?QU%_Sg_f{m*$+7d|}bsY5Pk?>8&3VKc1y_?xfFz;Z6M8dfsK2 zCwf6%VD>5NyVddUspIaDM)C?~%fYry{m#}NMlv!4*%mEzuLapck8sd<988D2`*5ZZ ze(dKNYp4BB0=uo?tOc}bopM9?9uVSfCrC+~3ueeRx{upq#N)1tIxSw)eEJ(KIpQ$* zpweNEvBct@&ImO#6O$~IiN+;sHTTPuknk_Dsh{*T*7+9IXfDI;fWuTD6Mqd9-5wvG zNb9d?&&yl-J%)doc35Y>QKaJIJ$uqqCxsUU*hGc&dh_+T1hRX=pHgKr2n3<#aTyY2 zO%Yro3U{)OeQ9UCH{vouwlXq~^rj(KWJJ)MhnCMF9#9BF>Z_`$uU@s+AY+@`V;oZ0 zC^{zC5H%xrHh*K%&7=7LxflD=4!OiDzu|_dOe>Ysyh?6`mQ5X+gWbm7RUYWs&K?sQ zR3Dn|vt0oIi$^Jz|N3AaK!B~@M8vrp_xep3v~k#K$gRLng9Qt+3A`c@`mnV>|2i&nzvU-s^d)xhp-sH!IO(u4ZrE(Mc%hTVwC) z7F$|dy*^RfzP7O<6fqahadTpB-agAy`~BU_d46pPAr|b8hl%o;TwJEql8wjC<(O;M zp&G2(QoVUPEcc#}_HW06119!Y2zMEvaiQQ28BkXSCgi<~Cph#V{+lHg$;7I{L4kS0 zr^L5ML#%!gn2PHx{1HsFtf!Vw1P2<3G{K?D!*dcO|ATKE z62qoni(QEj)d(ewBEe2`pJNYaQw%{InM!Cc_tXn^ za!HYP4}X~anf%yzV&_6)x1rk~SJ#=4qf9p$^$+b3|7qx7zK|bAM**1!5mIx1s)ck@>+sTkGH}sP=m!3iLI3Pz!$;b=F+jK3u0n zjoejH?X)ebK~o?jac^Azrw&KHVBK~8;CLX{5E=p~#({U{j#Dr{wdKRTR7Im%E5tNH zaZ`i#mLf>A5x`pF44EX7pP>eoDh3Neaz6r$wQE_`1OV^};8}mOf40-8^b9oh)|{;d zisc}9%5P_`gEqW-yUMJK5zMbU2t~R5D@Dqz8YfQuM*R7A51|Dd(_3fmC_Or-kPfl| zxxB)jsV~_9llT>pfnD0}+?FF-yRlEQn_u{(^z}Uk1J2XE_V?_@UQrLEx?Z0A8?_$2ibNlioW-ll~P zVxuZ3Y0E3$c-mMY)>yW(2WDG9yrn1F;_*!0w7h@QeYq)94VH z=fq%xpTSy9H~o>AQP2|!vGSU+Yts?FeqJzuxxdsZ4x%hw5n~Xf(bl=agE0o%Tt>me*5S|2aqo*+x<4BfSt?bG#g~S1 zWEv(9{2*j~P%8D>I83FAxCJV*90zY$icm%rBQbAZJF zX~z7IL(KOeY~9@LY{+~X3)~v|AZMgk{`;vj?>yd;Z~F;t1db4jOIjlNyTHdyX+P*y z4byU}fph^S3E=e*yBdz&eK_c7*d?lVKsFV<{ht5QFi6`V1NF`xF1_Bex_Zx3o_GKR zF?^ai5gpS=)&j6-qWiqtgzYFUJtqy6?owIc=eQ9oGL|I0Pc@U2*>iYl&7tQlgoA{urpwPo9dNa>2v>WGz7&1+SrEN)KoQz- zyAgM_msUkKKww4K`eO5EW^p-=2K6%+n1gD7n^J*(3_|}b54hU<2YmhC+vfc!jc6PU zz6v0v2R%h=FLdl;5=|edk^xl?Haer*@SWPgAp|kODykDoU@&opd~tW(Gq8hZEia@{ zC71ZGz@$n=&{H68Z15lstP0#g`1M%!b`SsxKBi6V4Yq^eGNo5f6AFAmCq8>sooE0g z*79I-OJEpRhFmD386cEnYdTE9!^Z2f`~ag(+q5?srowdf?u$TIeIE;IvU5iPE>DXv z+HbA&tZn6xsAVSVt^LyUY4NT};P#8YIOda;cvKs^}|PALKx%6!zA>B z&%fd%K6?LtKzjV-Cp7n+L%Fl~?L%nsx~F?+!irR*5+5edT*KzBTpA4UX~$A{o?;OH z=SEc_!>-_SLpp!XanaT?xIxaXc38m6S^O%2sHOwwNaLJ2TQ%&uKP+tH{HJ%D3?(U- zLX~6?kr1&CW??mC<_@|R)CDrH{49QcxQt9dg(5koh9sqAU?o6k#Ec-{$gh#-BcFbG zDoZ^xwzH`FLD4#rC`{4W?WstnU?EEYC5#YNqCis>B)>rndcoA`v6Mpf!Ty8}jg}WX zFTXx_*0B7c-4hZFb)<{Og|@ej$o9YabCya=D&FczJcJtdtb?UQj`T)ADh#jaefE@VyTH5-rzL;8A zu%!57TWGE+MrLV-sgpg3lvr`VR+Us(jJRcKc~%LWoCw^S-{m$N^256+4n3=NOw^ri z2Fdi!Dtg{dxei1(xQQjmAHF-Moqjn7N>K<;5`bVvI)DZab>$D5QO&~Uqk^OvV6ZK1 z;OK}rkg_=@Nlm-LZiG(?&e&$-(8>)4VJ}7zy9WuNh>tuJ@edB&C2ogZv>2J$f)}3= zPZ}|49}pNf-L=l^v;0NYGN4)8QeXa}w~AO=h8$F1D&jlgoI1sdUtmOE420*RgMm1b zVT9jJ311Preace&lGP8($Kt2Ocn|*~Y}o8DX{X$3mB+4#$_DRuX{P$5WLzEv=}D1t zWp;%qGmDVSaYB?ufJNJW)2GzEeoT>lVG#Z9zJz;aN9KF4M;XM->JSaPVuXjyIy z)Ye)cIw%lqbB>_3D&#kY#J9DAR3 z%Wsr8O5E8<-r30$LTPQA?pw$3{-&VW=|r(jF|gD0qQ~e8s+a?KEIGBys^!&~bb9yJ z<2;tvZ}`0p2Zy6NUual*R5JI7stuO=j61n#ch2LCR9|57N_FQjq|STVu9f`}!i%`xEi#N+q5t!_LiFLC?0eEs&gA^(Hk^f!LS$6B=Qz3WCfLQOL1drC-F z*(tCh^eauo)7J9lC52hukVn<2!QLQSa8jVI_+%kN8YMh5Zr9P8Syt-StT`mnF0ib_u)b8&Kf;4 z83D1f349&Fit|Y&GE7Re0+H^EOFX7ObGQR|%q|7cCUH^oiww?bQ|#BF*z~TdrL;)P zm(WI}%lG&L#A{A^@>JnS>dy9_^6aRsxz0Qz=`uD)=cCQbahM$fgBNIhe}?f9xd>HP z`UqAZI>^BFe*Q0ojPxb#=g%9`9V-3+UH|P%$p+X=Pk zRNNr$oDyb#S{NNG8fUbZoOl}=$6bE!`R)r>>gSI@DfbkcDsBlcha@<~Y_yh0}V^eu>8 zU~)YI#HZKeVWgpYg${D{fzzhdG~+WTRd0a@QE{NmhUWw|cQ_EAeW^7FDBoS}E))ZU zHx3_}mAYuu_w$InVw*UmdVhF#VLREi;l$GQCoPmTf|k)bOYHCt2BmzMJ?c%M=eO)r zg$V~&0L%#a5244EQ^za>a{sZZ2r{Hp%v_zyWIzXtS63?3ZfMIoB2 znu?7KcrrQTJk^xrH;dR*+K<|eg-iv-7PNG<1gG8JPPcRjn8=v@@x5^&y!%OxtmK1W zu~?Z*X6CQ55aS*0&l4PMXE4srt#a~_H$wyZl zhvxwHS}U7}&<-q&TA4zc4KNc7Uio(0c`1rpkA2$?9radED1=`Q208|9_8r!_X7%>u zMVDGJwVSKYbZXJnh>YE?flR0`^&1Nes)1_W{2$xG#8vdACDytPUa#{dd4uD_f)aqGO;|3uYUFl>dE% zPh%O`zam3LFP!ako%eM1G@E6D&CnX=9S1{BG)#ymcsMQe-GgZuoQV1kMy>X#kN$G# z2j(_F|7S{i@luywBJEmD8s#_B+1~9iZ*O`X!F?ImVrO^P&9T>0uc!?_vbc#Gupm_P z)=X;9XFg#!gZ&&8h_6$YTs^yj&Vw9hx!2RmEhXEchSxG096%@B2ibkFj2=eNN?W^Csod zZE4DS5<&kr@-lZ+(~irAjF6f$1-muZE=_WioDF};!M0={*t}r49yn+x$_I1)PEJIk z{bz}Brrf5DMR}T*WgVFH##Fbpf#o$W7m0CRA^H*jGdOV{*B9tJ_fbI&7ez0f1K@X| zuY`iyq2m0*`my^(c4R4xsr2^gX^(5y!LQR$@3;z zYJ9jD7)!?f{%(WWMifcc8%~Bp5?*xK?sEbgRt%qLD1!t@=-|;Ir{>Q)+*%wT0G4>PQFr|G0Z!@|sAGHF@0D9($rsyBgv5 ztstl*S3c+PJa2DnsjIm%_({l#ZvCS0CwhNeK=fD7zPMnok&8L>Zw7tJCyD#KjAd%J zIJa}{m|P`JF)w^i+^kiJx9K5oTxzd)e<$svo#Em3x1A9cI>=pm&@#lS=F4VpFPGZZ z&K$!lw;-ra5X&wl?AqAZX>Kt`E{+1dZuMWRHho7zwuMn1aV$EG$z7M|} zW3{(Ow+zKIMCG-$t5FU)MKk9@8eMq`j%>zx<#bVvSiQ+gJ)Ug-8ookixo(#{yr~T) zV{V-{;&G+Bpj)!qR#AH4ezm1&Y3@MK{N4EZ=ozAzx^CCFVB2e?QRFiZ5-wNP+5{-g zEu4JNQD%{A^^IDLP`A*_ z;qOYCGQTzrQ9~Cy`WhFTNVqFLt-L~dpBwQFgP&XmZO->jWl#)!&Wx)N7T&$#^p*bB zcD8}?R@Jynq<ekoAy%Rn}K7r1vyifXzl)U3JVeGu}`(Uv_!%+|C2+Mg#nlz2h zo6hIvrIjLkp5rTumT`I-i9=W08gl8r*HG`!J5$+MQFtgBvKqpVQBibykqo}1WVm-Z z#7ryiUI@?OzOZs~^8IB>wcI5|64AFV#6d5rK8EOfp5ff;-x-^>pxSFww-l7<^Vl7I zy|WVkLuh5?rwVtx(I_vyV@M(UjT`5|VS#_12(Fw)4S18!G%M7%!&gzu{{3`=uU+f! zH&dfc-wEc&?hIhaYP_5@;)q9H?lb~*hVm+mN{ z!L3DZGWsY9EJK$7d#Xptz#>nN#g$4ovcoc(c2&f1l^c)pO(Js_;?ZBeJSv@D z_C6i2p2=rfb23m%(B}6kQ@ja@kc_mnaE8CE9DZpjsIya*FA}jO{kQM<_1m{G0F~Ov zkn4d@hoot97ylipjS}yRqp$qF;lX-J=2YY3{hEfpY6}KlzCuOG)vV3?K?Z)?wi|7)LqcO7%`uJy3)$O4 z%lkO{O}H_AgB^(w3&Z@e>DNL9Y%MSh7xv=vA|vM*EG2ENwQ2Cq!Wen_|8rM^GqL0@)cwB#GZ6?P`t zsVnSP*sIPKVs=kE&zuL%_7kpOxRvXCANct=F!v~apR#@pf(uvyY)@|8zTK{3V(oNw z8TsBJUaWubP%-q06A-0Z2lmCOXyj`De$t-@krxL0Ipb)9Hb&@!PS1x}iDhFy_@DS} zVD>0Xhy2v3?_|6Aw-W|R?iI;9Y82>F^<-;FIiw%I5o=57?zANBV*6pO>JpH;Cw_^R zy=phHuad1VANuHh+Tlb0sTy&5wcQO#vop?P!zU=to>jE9hsbo~);V;vwZm)xSwPng zmS>X$Q4^#^~m|IAj$ z5Ly)}KI)(EN}hUV5l460?1Y9{kxRg^wNOkPP4Ev%G`8ign+bb2uX=Fgvq!)H+;;O7 z;1S)gir>m=`j|Pr<4Lrl!@JPS3-h`3?w;E>tdCbKeddMl(~|cuk@$PlnHZ8R?;i7$ zjVXr26__+drx3|01#V?}b%oa;xS1HR*nYS;6r+`Ttu9uL_LL*~jh`^YSX}&iltHnn zI#yfW7u@nEK@$A3e+f#=$!KZ)tps-NOI2nRx7U#+^!AOVzrDY+?>XPS&%M__e0(56GS^yjj`5DSyk7ejl)AQ9 z|6)bM5*nNzU>)?sEW;rXVrHO4&XHm&u0j)nJ_XY>QYl$_#5z4-Qn3@S&8l zy_hb|CQ4d7INdktqLZW`8rxtbmly0lGRg@e!Sbxfjpuy0RakI6=iV&;z!();3#xRfGAlE3~*Z!<{ zAASo&TuwhezvEHgx+84jc`GFPpm2&qi9@b=*`8AK9tNPIE4_5P_u>`b{csM6kP8sk_AR#rnWcDlP# za4)kJtc;1?J|{EWP8kjrt_-eZm<>D_EsPUZyTf=A0Gc036UwHUhUVT!!09s*1{R1I zBNlz-i%UP4bL><0n1~+iVXuE|Lur_|iA|olS$xT%L_)$sNrat!A_j)@BcO)xBuZ@J zFOk6#GU<0SVcGTbja!tGP`kF`>DrW6DsxLI3_nyV-SkekbIIMabyGMre0h(QK<}XP z>AQgx{w>=+zS?!C?s~#_)WtQ~@XxDsWMWtxlqK^m1m2|)yX7ezOpFXpg2+^P<+1I!56iL$}jsYK!)9H*S=1A*H zL)65Yz=fgb8|QS+o$Sn()bUl478N?Lo^ZdW)JYTfQF6$xN-@K3{OMC$LbjxUJ`h)~ zy*29Fep{tL!*eY`4gb7n=Y7V|k;JB&XI3E(V#^NNHUNzw2CU0QLB{(LXy?-l1)*P} zH|CK}*k8WMO0_xFO9GQ=10G$Mp<^Xno3+#!fvX+{0^j|R^Cx@#%$Z285(cn93_}DN z#|!k3NFHz0L3y9o}gY2jla+bs){&pvv*;ol7drdSDoE{ zHf>z0BZa;k8eiZ0rz{@Ai0{IxzzbA&C{{ozyCt_nF_UqJqRKcvE;!XJj-u9Rc`lLrW9}BtF^ESr)rXK2Eb>sBi!R{3=_dZ_YkFA31(&WVkN2`WmN#6Nst|Y}DVsO6p zI8dsEkZkCUSV5bk7S)wDdY{L3W3iV0fs$q$YU>;8f^J&{c3V=79|PSaof>SRdieC9!k47kO7=E;uW_Hm}y~vy9kDCyPmry>xI&y!5pG#Dy7N$x+e^qBb3Ld=$Wshjc3F$|O7_y3=OkDai z9?ku(gKe5c2T%*b?dL1gcrLh@z6IB+Y>IIvZn9vyq1_#-5$EAqcm&rkG%+C|!I$M=#xf3H*q~WQ(t>YXg%{Mp z=a<=`NQDRLOVnCsa4L&-9ce`zkJga7QTL~*gco@2!g~DKaCc$VcXyZTJL7!e!JFeH z5^09tz$+MSE+=3K-6~17gvm=vAIx`YX z=RGYGh7X#rrGz*H_c~k$*{#yk>?n*xOoP>$t44s7q zyHH5f$vV<29)@_n4nHK4$cB^foA;KpC=^~-RGo=)u_`k%{+z5>Y=6mno7iw)2Qr@1 zbHJUa#Gx-30+zf_;FJALD(PbDNP@mMpW*QA40{wEzugB%-_GqSQEM_*0<#xL#Px%( z>R`NezaLvcYShdRZ0t0Bym}dG-^NhLT#G{XrtQfb42G=}PrLqI)HQ{onCa(7G|Y9{ zYb9O-#0COyR)iMp&ZveuP5ZfBfwoQ1oRw%vXV;B!n= zwdA1j;BBc9^O^fjzx@0h(|=W+N|Et;oSZtlGQ*j5Gdrm~{IyGsWvxb_oDD6RQ6cKw zJm=oMHy0o4f2oN;3LhXt_;=iz2?B%g}d=4k9Ro6ozUa5`o zigVLAJTQDA>+bNtMZTfj|7d(FV;K$uUeSd35~io4;Sod`<|5tRUc0tvu@4feipOr1 zACIJ|sRWi6Dkc70|JgEPn%&t;X(1sguxyVeVK5 zJ4<)b&Z9iZd2u98ZZod~^mVtcR)wyV{@Mul;$OOm!Da)}PP=;M0+XFHiXnC1pOXI( zJTm}pWf_6jAp=B>2^K%98yBIgp{s4*YJ@=7RDr!$Oun=J@Zxd0bO+a`BhI$0o>r=} zP?EM}9ip*bY3{HYFju@7F%pz{(WTd;xom@*r&_^ca{2n4@^7R0dDN_C3mxN;wDaPk zUdN>}Dv&s+@1dn5>To6R(b}?rD#L?t0$VA-bHCS)eq888Q9pe6Fy6M$Z+4xyvIYc# zUnGsk^-BUGB2EG2t5r0A;LuXdJJwN>K*~I2YHEu7oH^9E@A%Li7jzABWAuzpKT4KN zp7KxuDwZJ`!YIp1^%np;3gIg%_ZJYm$dGsj%b9V*59}Gn1tYvNKEv^x=H6|b z-Ny0`^xK@=+zF|Yt2#Gt#NFZi+VOsPh4rzAj++1a+Jyt(U2d#x?)<&G=+426L`8h^ zT3GlK;mTX#^lbpNz_WElM&zAlRtf$bA7c6VPXi+EYIoD6>12?7-gFqK#p)><|GoK; z5vsi6I_DAjUs?~7k|RfsB(n)P`3tjVV|zXbO?=g0K~dM)w&;Y9yUJ*qnOrCo=luR| z|7_nLcJZij5+TN4O0&W3!x?9Nhmk}P1yrhH53PbJjE#P!9@y?L828?4OUc2O_-9%(#oC_iwJ{eSK?tQFG1`ezTA zc8Wb<7nw<#5Ne~B#jqY@qkq@=`Vs-SNKqgT`rBt1<{8JF=1cwj+zpl0@5Xp|h0mqq zA;Xh4Q92bp6beP66LnF~Bn-)pJUckq#`;2wlP5L&tHw-IEq#b94D^>bA-eVs2rv ze!m0Q;wM9!OHkiE*UmFvc9^(y>vQ%Axi3*3szWE}13wr?-EjI53bgnkh-7gg&a(=m zAdJjjt{GuU8l;Yzluk7eA5x(q4n800vo5(a5dmPXzP&b#j-5=6&ONp5r>+QB7x z7l|CksxcxtUg@?m6J(82CZ!84sBy zQgM9DVUL{qY8UfOw#V|#Gz1!#eDzD-BAG1r`nlY$G59pZG=x$2%UTw%eB}g5$)qASk1eohVT&Y{`}G;TRJu}pV=?j?&Y~;lmteqv2eQ?GRVQr-UXKZeD31aMh z{syMq+I#j0xv@~4e_1(1n}xC!r}FqX4VUDLV~z_a{Xcdk`mtqK9?fstKJ%`!Bgj#I zCeJQ%`pLgW6aOlES@3UM+Bg3WCbETcU(QAlEdJEJz4=i079(}9Ii2&!rd4~14XOO; z8~rNow}x+A>gDjH&0^c*ClLjqQ()PX`0W#&5}P0Q{X8mbiifqR zn1TmBxmNth^>g>CT8us-FIEnOHd)4f>>Zq z!962M6W*rUVY$=O8NM@Gp!|e_G16JU#@VgiJaPEtrPt5TkDl$8eR*r`8L5XUwG`Vh zy|HFF9{tmNH#S4gHgZM%E|c&(-n-zT@iS?J-;_snQ^qLkFF4Ua$5-vis93?D=u0}$ zGX&1Gr(k$>ecd$4J%#%70~=O>5^Avagn%+E9mYGbZJ&Q}7j$$Rp!{iOImP2PL_TECc#XGa|Lt$9P} zPvFjCz52UXaSg_!%Hw;5mICdZ-3Ay>%nyvjS+e9O+xVHUu}^88vfgGQ-}+O97`3MT zrQoo`iZ7Rmg5wU2z(5>@iZtL4bSL$TFPylRb8X?iZT{BWEJ@{a-kQvP#;4Ik<8Wz) zkO&h0FN5WW1f=<`TzAJR%6=NV=3jC}4spF6!kC|IuYC?^uJU*|J}fx3RceQ#FJpTF zRL!rLOK&5HZx1xsS^Cf_!1AmoF+T_qo7tI*7Sh^{RVc(cOU!c|JbsO&_<`tSJ!Q2$ z^hpSN$V1w?B=u>+&^VHKz5ZL&<3ZrBX0fL1hPXAf1_hv66#%^i_&`mBN{;94onrlB zt^E^xNOUZv3l636@Z=vrY%yt`tl@)rmqUm#15Hq&8X8Lv)Xru)yG;@-HgWnELT0qT z(W=1k9MVbapz$YBocTj+5zSigFJ3{?_rk{sb?*l2mzstgF$Re1yv^Gs8Q+bf+npUX zfq{g*v%@~$7hCu1I(goa#d$!uCUFtEo-ZM5nJ70J)q-?-GS=~-exs3$Myh-Cip>#h z)rYVilwt)6-``v;VLdA)E8u2TEF+ge;whiv5S3IRcI}~{`f>SSH zc`JmsZ2xy>^t5XTlfgAtZ}OHV|eRrrM$MgH|}y0O~Q~-ajVFd&D?l(SGCA;PVvItfVIAKyol45DxZx{WiZFq ze)cIBq=Wks<*9MAgXi(yN}%BK%39wCIKd^*ol&vJ$%sW`{TF(jfySgbtCG(%&+KyR zYo%^Aytl`Ky2_EHK&LbBFpjPtM{;d0nJ-GHbUT@b-MMc6?9O+tkW_z11&4tm215^_ z`yJ0ZtVPJg(MjM)$3pB&(i!$a-j_C%V{r>)(calLAXW^+n8pK6+;EB~u|*x?p1Fpa zYY%XGUU7Eodo_eRq-h zIj^P&`kRPpBJ<)wg=mD`Aw}n6Eu-KRn#!Xx(|*&R=e2()RO3vh+J4gOew7ikFY!~S zM=+|Udkd72l{U;5M(^$=C zk{$9F<%@h^EU4nTjQlwA%+>AQhD%E>1f^XVbiMK9;Qr$P>L@;wYRf8rhBeSiCz zq)ha};|sUsPi;Fybk#FId(Be5KKPJZ98Z%FMXYB>&uN(G=ziJFr~(Nr%{I0`RT0fU zd@Pb#uka$#PVDu^S?$5}$bN2ybYxgRcIMAWjx)|ulmRpHyL*vz~>vr%w1KnVDTiq9Mph>N}VrPIcpC`roGiu^O(&=%P1Ml5}p7vVxLt}vY zao$fD#przb$?{-w@e)V|W!y*J-%Z{};-e!`=O(Cd%?Mo)hO4M^-}*R--)AT_l$(*B zS6G|wS0ZTit$X(SjYwYYz zY*Lq=u^Ap7259{0Q<7R%P}qrd&jYI88_-)do11%S=)A)tE>Gz{RvO*sdFb_aZo$jJ zbFtwgC&+ixsh+Zmi|l9jg>LD!_ncRnp?I->N)=*b)|rn^cHPx=Wl z%F?*Mv<+d{isa^2;oz4~-}FldNQn<|pKTZO;HQ;jRPGjVQAVB31HZnmh}Pl4s68@L zf61MDs9tIzhR!19yoGUw!U@b?Jk*13E^ycm4BPUMm zL&`mn_35Th{{|_NY>JeV;;Zu*mBH+qmb33?Hv74@cNH%_oeyjON4|2tba#Nq2i*CPxh5rH#rWQCa=~QLC<=5Y5Hks+P?I}wu!DmXZH_Er# zmp3O_JX>nywyKBB%Xfe{T@Z_XS&*5;!()5%rc!n!+)g{izae9K(`*Qfz=OV2=mcP5 zQ6ZnfJ+zGeb8b)0RWZfW@*$}(lZoa$baub0Dp|)y^aDa#0TZwj?rbKqYidpmQ>-Z% zI)q&r3r2P+$}SIE(dXK}%Q5hxbX7u<`@{n%{!7E4%aU_f_k@`7@!}P~YDEmIjq@T* zXNYb}{@?=A3!11LE>xH^$KZ;|-@C~5V9BdqEMk6}gWLEGwZsdMnTaaJ^Wu?ko|mvd z*=>u${b3RrP|rnSAf7ELRkIw?JlFRDe>KVHcCF>u*R}4EO9!- zB`Ig(z+8!#B-;Qxb01`KGaJ7|hi_Vx8jY>Bq^jM3E11=ruQyy+SZGB>>t?TAOt@b7 zzUL+J@pxIqoV$0+L{8v4++5|OJV)(1nwj|&x@p}e>eJ)??JQ}N4`!=^R%VWwDjFNi z_m*1~&?qTar}}mfJ+)mNxco?4^@}8E1$0iL=10CD#&bn=;f^JUL12_YdTF}djWC`X zAGve!vyVNQN74hs9iUYY#BBLw!nvOfo95<~L*$-6g@lND-|J45LkC}df!2UD{W(#>_))4> ze$kNRZwQ7Lfjqt2Cw4L9ioY7ZObts)3gML<(S+pf?GSK{&c1kluFo!WI6^E#X3Y%D zPJn$-?LOI^Tw9uG?vvAK8-u{EU>6qVv(R2oMAtNE1Q8+yqZck-Jg;!%h2K7uWs;#s zf>1=k8i(<74A$})*Sj>HJ>0u%YM*hjX{33oiVvEYbDui&f+pFMksLdA|8)3UBNRn- z)6tKJy{~;elq4<2mCHpvBKENsL(_IPx;8^`EzjGHFLRQXNZDe*v}eS?jXzG5cmc9O zwoB?OS>lLR%jT`8%+AMa-7sm3Y^v>tq)P-CcyZzBu~_lJI{H>PvHR{uWVyLekoMw^a5yK){o8JCSXg@)T;s(_j!va>dus7HEtnt8zBTZCw_K!>fep^qW})RNexY1A3GMcP5a&jIfIxmFJ>plhlcxKHVg=FGv7It3*m<@?XM1f!M4|$3DD6wP&>*($i1D z!J9(gQAyA?99|vUA&($@Jb571{3@#rpoE3|=QI?$7H;wXAZA;A+ zmm3w{RJ?j^Ytk>y!F#Pp;+7i+-^{WO zG~JeB;J}dT1im2OatX0!DlVFdQJma@u_uoA>^~;gB5Qa-Zm}s1gv;Y?7DRm8)T~|0 z?_mj(aI9LzbG;D$Wz1m39812jO3WxT8-N$5M?Pc*xur$PTiy5_ zpdQbZzG>T6;6h`Ztjytmd+n`jec9tY5>LP4Y+X#^KuE{4%j<>9Jn{}59kQj%!!GI6 zos}>7*YKdBwW(%O8o1i4O$bmNEQpyM25)>6Psh3DMIhtbfV=puzQQArgelYo9=@{L zjSET&J_sE%Kv?)<8nb@oJ%5vb;dUs4p-QF8+p%+3pRN}u6kf5-X)W5>ae14sRxgjTw3*sH9O*7+w&eK8NmgT5ec02vMU{4k>S;6-9AY!M}t z-Ew!#dwpsj*;MH-^ss`CvsYaNcyU)qKU+wuiu=U<<7=j28HS4sMh|2#I|i!VxdsE= zpD^#0il`%r1gdi8>3T8RrJGR>)k&!LsrTZhcg9YsMJDic&l@?rl`NF2C<@olk6n|W zyw>laC~Q?l9KXKy-m!@(rvJKPpW8spv`;`AMQ24gvvN!Kc}OacM2cNW#M|q~_wg=& zztd-|-dnuF@rdh7$5fh$!J!V+bC`X1Hns`!btKq;c<2 zec{KyNvL|DLsA9g-$rR2&#sH(d6-t@4UmXHfs1xWwpN%Q#YNRXSS=+y=Ei2xcxmaN zp=C8~hi`(=WOv@Fn2F-c#a9-%66!X|l7SmB7u(9(0BHu!0zcdm%81QQ+KsMvhV8jG z7GomuQU48Lb7E4Y312{Jpx2?hNr0aW!MNeB4rlzx#8|B_r_+!J;=;q|o8}I!Vt${o z9mm+qIQ2WqUA^bVPa1&~^K-GiAx)6Y!_2k$2&UsxM5_%4juw1ib9cxaKKYs4SpLTo&1%Es{9I87;r!Ejw1CTm|-Fs}|_ zdN6{0)5-Nc%G`k*0Rb7Us}pzB6-2tQSLh{8zG~vwiU-V7O5TOG6u%e}84DX1Ql+Ix zkovn%t-KxH$-;h$wg_bUudmR7B{?y3zv{xn*)*d{gjbh-QF`7g96YwyF(|Pw#a^+b zg!xjHg?;K#ixbDVx>Y~!BgnEf3M8tnVAk0f&40qC%u290p z`PW;2`?)Ui=ltilC~6+>ekP_QPrQln5?h15mP?U5q2|RYqF?L&w~$}T0f+GO z92%PS)Ge^qgnpg_N-G`+4Ng_YokbmA13#r(6Rfw!cc>Fx{ilrDHrDv$N&U(w8|8U4 z$aLyJM|*Rk@w?1%he7&X>X+!h!0$Ix|Fko_-n)kqBIU2`m6hf;?c9ZRJ0BrZ)ZWta zK*(75xSv{~T|Z0O?1x8=RZ{JSyHcoa)>TIfE9c7XT(RTLj=^zh?lz8G-r2q{l-jLW z;MN>oQNTCk*`2EB6bpUl=PvPM$993k zws4C4Z5Zf>gR||Ltn1jhtH71GDiD%IgSehR>N<9;b`*F+B*#=+JK#n*RnC_(gO!Cv)eH#aN#=o%Epf9zB#{Y>PtHbz`($d1C0M0jX zUxmN>%Ld=v7gC#+EDM!$3qRwzFsOm=pzjf(w*1iCdzLi_N(C9j2X=z>Z8fk-=K=mt zR@MUN+^Ke?esfo+A~B>*5q;@ESauC5;G^3B`q2pBqKfWqTkub3Xk=kyxRsJu1@6~Z zscv2Z6Du?-eCdZ3piux_<0@TN;1V_7OvsQxZ*g&oqj`;{bXo6^0j&Q}F;h;C5(55b zINXnW5vrlPIB&n<*MskdA2ISAtbqMjy*C~IFH9{rrBE@-&SlF^!SISR?%dBYBAE#H z(&isH3JoVN{30=>yFWM*q%CIN&;wmV6Z%JJU4is2!Fa0YjeY~`HI9%}hVr8JTFj?ah#lbS`HaPk3ci z-T8V9iH?Kz^c1YmJa~MhI}g9Asi_%NTv4=NeBb0oBh)f#5T4W**fEGFw`4(mUs8&) zSCOvfj{g$2;R_#GUF9#-lLpHJpV&1Rwz5>6_?90>k%w5Y*AjEf`XmGDq@qJ_N0+#& zsvwBT6+0u^ZHImVilCDo*a;cvI_b*$AmJIqpV=_g-lo&p_9VmsT$N+2*^qG?|0s$=g>RS9e~oTAiLvAU ziEyjBB1-Xy2ZfG@(umwl)si{V_+;w=B-#KzS*{_+m4;eq%v8yWmB*gJP_e(rLE3rf z=e;(Gr&wS>PPM+K6u_n$;#y?RpoaqRe{gndIf? zq!AZYJtgV8&Q9Z)MZUDR9H)2fN+F!uq3g*F6eC>t6Gv7o254>YEB=w9h>wCx=6$H{ ztf5BFZlm98egwH^Pq9WzSyZdOc=5t@5`cn}K!Nk#7jzuI#}_C0M~-s+!UUlMsd z-^W)pELBiYAS$o`KSU?pcn0ao3eT#@cJ4zBqeJo{_;R9t4L*tZ1B$ktxo}|@cmOf` z0Zx$J=Jpfe(n9!(th=w6|BA~*QJ+djg>N6&y$pIXQa1Ya;apBFFfuv zK3*9S{^vIMb4%Kqm$`n|Imv;ON3bOPE`~HUmwpFK0=DZ-JRj>pMwu-b{u%)ig7t?h ziHCXVLlA;zzsk2|9S# zqK!4%#qS`0qaQyq)I?}piIeL;-?|nCXI$<2uYbczANWje14+Tctog8QRAmq<)~Ts z6Ejt5XAIWh&t35;!Y}u1{8@=JEk_4MAx3)8sPHB;oI@i?Cdz{9e!M;A{p`@t74e6% zx*pvfrmWG}C?0xUqHk~&_pL;^F*k<;48ai!$j`&?r6#*_9;ccWNPNq;3Rzj| zc^AlyXgIG97)2Sl{e#525O!L2douWqu!0TK0l>s*BD%8{<6U zww8Um2-@)f8MdfDNdRPc1Omf7brys%Dd`}D=?kce>9dDs$)Ru=f^$JoA>6|`d$0rG zY57|nba0lO1e0-_4^M8ImJ(l(Um8mLZGKfPn(1KVL(6^)#ry}xz}SYS3KG%+3D3q6 zXdt?CeFh^#gKV8(z&U_?z+Xma12IBMu6JX_4H>!=iYHQrtyCbR41hIG10a$SuzK5} zudr!-=0bIKHD)eFBe_h>spPidka~OlMpS0VJ0%T6wno{PN$)6%JqJn;1(eD@8~D1M z4753P=<$k{mYrh}|KJ0v^U$it& zRp_GQjs5$y)=wR0weK;<-DTP|ccb$U^K85Q!~2wIzC#=4u;7JyTZaBatqlJ5>xs5@ zbhH{{7LinGsp+0weCu@N=wu`RFaC2MkBiulqcW2{#;+TieNXbeqpA*=#b7;p8Plwk z_Vta_m-n7vp;>OM?!;ofB&0Rl|LunTN4H5#U6FgY^vR`3lN(MA|7bN8Z6!gg=^QYN z=@xVFr^Eh-7=i%hl+l~T6HekHaEPwbc@Ngpnm|k<2o5eNa3YxXN>~R?#Zp|Qh=~94 zfsfGwF~cDb4lzf>mW9$f6VP6^KaVd54888|zKWj?>)(vUt9jWsAFdpnbicpMRZ>W8areQ-ko<~#oxt#Puy_64butB4V4H2;Z~bwA*02)VVf_8(#4kvvnH zG0;)Fx@l6r>buO-(e|w>kjHv7_L)B}7Kru2$GIMJ%~LJxKNskVE%$HxH$5 zWbew=7x*7|^~<*N(zkk_J-5NmHgyVsaMOs&v0Us6o%3WC$3U^}ZXD4C% z3mJPF&|zI#B}tPb zNm*s(Z{O=CScyJAo`>Z_P%liU&O_(Ev()7;tHR9JL1O2BWmTk~cSgk(Kp6;S9Qj_H z`sz-Y%iJZ#*E<_K zUIv9oqyIe~?MfJYiY6s^THf7Ja`6}AKP-Qe2)WYkUo?NV?$1k&qj}6&y$g-N!Rh#L zooV|r)A*WRp+x}R>|l}iaj6-yVIAo;@(H*~56twlI$#1H2M;%nL@4oy_8+r_Yi|TAGSoXts4z`-Sp!=ct4->301+ zb#ob4=*Lnh-We^lCU9{4QU7eD_KreV_}f_FnDT?~9^NCN{qj|;*v}HZIG|z?O}g6n zxOZ`^DMz*BD98D%yWpD}rOGEE4afbJJ%0wnS**+BsZPYNc~h+W;=f#4E8iRX4%Hr+ zCebe@aPQu5c<$u^^2W2HM_N|Tub1nF1q*GjF6UJdU*bbMnU6BggbMnIsw#j_dh)Kc>VRg7|bqF|LetqK6NU zG4?+Q4Ym3;)-2j8=`t67j|88!`fq)l7EOIx8Pw+ed`}<}?lo@E(CF3zK@3)Ko{xKq z2NV`ge%^UEW|DVVxm9Nf5(j1=={^s+LzIW@sV zeYEsTh+#fX_Q!1d@jl~I7c`k=wxm)?*~foh+)usUlTm-;Ed>-h$fV1CQ18#o&J#XQ z{G(XSBQTRVg4oSZ5F-_xhfe;6ji#%Q8vu|Ua0>q5%9)YqnP zNitT!Ik>o4m{l}_l2*qovgX+Eb!Y5!?59FuEq!$5p0jCgOxresb@OrIV)NTrFs`<~ z2CxmjN_5*Y_+vj2&$VI{bFjEktK|)_Q=WqrJ06m!^j}@rN9g)83=ja)Q!F^GWgbd; zE-yIY7oU_Ao^!S4BTz!G9Siw#5l0E!8o0@`J{qTS`A^NZgmxXy*02cIrB z=Mj(7b;<`bm9Br)Zr)pzCZ+=)0nf2r*O=@>QizE*>D$G1xedrs)z zYE{41V!P}^=wXf*$GVA%ae)FNIDujP>sB(6S1q4Daf z{9BvsNou|d5Pg!2!i6W%uLCVWK9osShw_Ull^<-uKSI);Gel-6!xt#6P(2m6QT0p#lY+{QOF$rs4~0nri7M~m z7Kai@j6DrIip^W9i_mm$0RyBTo(*CALkhqD;DPe^h4UZXTNnDOwAue%x@if%E9fgM z@cDj$>|@3bkxYvCpWec{sk7;`$$brG5v`d1#N>1^E&2~# z>K$y2Me&{M1VQni_~~4e;$1>2?q_EwnTDGSW`~z$2|AwtQH^~B*1|kgxGEU#K(ZKs zCF(k>V>W(eKO~Es`nToGot(QTWc2@~82i7-QkzvQH+cRGkN$59G2g?zU&^rDIydY# zw%x7yP`sc(uE8Efkz&2Cb{~{e-cDV@V*KmTd`|`<`tPfqV0kGl5_XA_etU-POM`Nv zJ7MMV-srWSjYG2pZIOU*+0nB$%g=_-oqAz|9d68S9GP8SxMluc^Z61tmL)aTp`ih# zZM90WI3&Nn>(H91e((QbMszG=)&hi8%RpX~b^IoOcXn_iAjSOXBSM~m=}CxZXHsnxJ@%K zC70zDlO7EJTV~qC-fyO=jRzI<=Mu*oZt*Pr(;38~)<*3qyYEYTC=DRadH}<05;CI8tRQE4_cT_|~9mHeK1H3jmHdOf~ zDCEnO(NSHmPZ;e8nnF0pP%dU#AZC@1`U{m%LFK^C;lG8}1wkqlYnd=&qKz&AH0FtN#+v9lSNP&L|TQj@Pxif6G zA~T6xcMxnYwFr>co0ex~od#7Q8L>{*P_^!99m%bD@WS>vP_mceiIfLls zMP{PV??d%~#0h`AuL#lH#Xr~VC^PUDW|Q17dTdy8q;^)B@p;mdu~ESxAQI)Fdx?{r zmKMH{+WlNDBz0rgPHbC=%>uCUn}^1Ct%RihqeKOIYG)QuR3{PW)iDMrCrJx$D`F!m zfNwknV4xkG_ZNVS4Gd%#=Zp~4D_LO7D-HKZGVqg&^X9VWFJ6pP*KZKN-Xp2TfBN)^PoB20 z&~%@ft4+xnG}vdxt|?g(V#^ROOJ$}b6hE4-+i(<@v_eF9xO0~!qm9zZh8tA5qdI?p zqeh#;do9CHKbLuYmfrqmk3Qd0#+t_V)g3(EdA2Rl)sU%UyvFsJlyLE`{hH0%07xSvAG9t3fOVarq5Tn^hC#> zYgWQe9L#7eL5Bzp@R6q`ZBBdLLZ8{5sD0*B#1U1cT~?a$;XA|I6r~Ry{+43FR!48u zJ=;yWv9R7Q9``WF8J89w^mu%SNubCcX6?j~bIBja=maUFO{|q~G?9gKJ-Y1z43zFq zd@DYV@D#Omk(KY_O-g(8Xygq&7>js;YQv4;DLRuc0l`fs1eM+!yyuq!Y!#Jsf%VGx z^lRv-ohJPBz&lePWEfX@kd@*T#~MuE2q93TgLiASa-G}EaQe!`*Ecp#0A@Gd2Tqnt@b<2 z?Vdv-m+0v~E{51m5oe8KvQ<;=^--IlcL73WmrG-HWlEx^Y0fRaDL-)Sn< zMk5`Ur-!_oKO=%G)*xoEhFUutg|tpBQW!IkKjyz=6jb%%fV0SOMxGHB2&!Ns@Y~CA z<0yhRga=bH-E`0(&I1@l#h77aP_H32H>Lc~wLfkF=X=&^;2X-`@By&3xosA_C(d6oYLHS|Qn| z>FJKxeVGXfZvCe*cLFBgm+rlL7+RVZ7mg4redEdRK(^WjB*~WdfiNlZ0v(NRx|h94 zhS>J)+mn=?Y@2JpgJNf}iw_S*LAFKMfp1-Z<_~9DwgLK!ddujeShlw>UnM(3eRE^x z{)U(3;^;Y&j;#A7>yAQ{b00=++k5SYqny@i%a1!zg{X&3;h5^G{qF{N?-06s-!G%a zFG=)OZP&oAaZ)xHB&*4DN33HB`v)>&n}9609FSNw~j7+7$Va=5s`9YsA%bamBXmRS&gFX1StHDZzy`>=FC(*+l=(z89QK z)fVoUq89jnuN1O>*c$gCD{mO6ZqaCnT>ZL|H8dadzyR5R*(^ z1!i8T02W_&(@qp~dn8=|@>eY3Ip zeUp-E@iLk}E8Uob*MoyD`sXiARQ*sYJ7iMTzl$2^r|uK* znae+Wscu?hAl5ys8$f=;#Hz^GhH5c929-gw?+8OAcc+o6g;}c+3$FP-i_8PElApKh zwv$!4P@bg5+;p6bZ_gC@S3`z~jBl$0E_dwrO&yAy5M_2#xU=*Up=i2uZo0%t*1h$b zU*UHyu4|6lnz%eiziXI(e4}x8_$MXfR9XkIHAAL4h2qhPlctOP{abI&}M{S-gCofT^!V3GA3su$v)1^+u9 zyrKiqa-$$SJ`I3}5c2qE9kxoaWfWJz$%tqK2ufGZ1v6WUwrzos21EcNN#&@9Ux=wb zqf`Wg0F+*PkkA2`bAtP47$O7wy8(o!@uL`D_wyY!D9UX;5O7ba!qsNk#IfLoCC1in zP7Wu@bm-G|c@#c@zGm(5;%Hb+V7<1z#z&lnM%;7CKt!uX6(djV?1LGR#roIw>e{@g z=I2|Qe{41FCu>Yxmms5V*K9q>(+sbnR=u$pl;Huw7+6GpAfd3MHO4l z?+JUe$6yC1J?HXNO1sQjN?(%ifGM%YYE$ET`O5NYc^>Du$b*Uaud-VSD@pyHQkbRm zn7$2+jX|dI^5TTke3Pxk(9@Z`q(J+sH$Gk)FM4EG(>{BB=ydaXvBBl_hIAVL`l}j# zbV|k95ob@bGdO_XCY9@Ppe@f|{o8e?u#cZ(^wlbrgExZXAH@g%mY5W|-M6P=M~&d- zj?NOj2C-E2x_*1gM_P@B)3)qcQ!laXjT5^;USz(paJ?v_m*0_4s|)SX~1d`jmH5t}ekZtmx11mBfEdOi(7K!D*4BpP!MAq}(RF>#^E!S;T{wg2WqUpJ89ipCa3pm%@6x4NW_^FFtdhDmOdKI}c zymKaiRLrQ%>Oo1Ax?1DHD96Q+^z+;W2?gO57E}qb-oB>l9;XI5Y%-gCU-qK8VqKdC zHaW^(y^On|S}2x>S`*zI=oh(igU5Q~`R?$GG1`6?f*0CwpR3QRm<~?Ku};hL#(DVf z%v%iL6It{Zv7pHBKw(!HtN521=cD&pS}D8@F#NjxkThS>cG_w?_y^hzr`p%lg&F#V zW1gneQO4u?`^m9((th0R>1!M_Kku^7-122&EatzTdV?JOPQW52KS*YIWpz0C8_zSl zkc=M&c>?9#KYdR9G<}IPHh3uc397cAefh7wH*I62P_1Pje!Am}HeZda~#< zx@9XL?GQd!>pY24jum27nOm!-%#VH$k#%3YHU$QqUD4N6*JwC=K-&U-xid9&~>;vUvxHT$_LytI2C&ZuI~Rt!M8&t9VN z$y>sVjd{xXE%OqF!}PP)Lh))m@^PWNQf4Hsb=D%68OjET-n59+rXh1V|1~fK}e>2O9X7 zAVXT&x6W2$^kq1@fiO;SdQgl7o;hhvY7RYfw0rU! zsxbbxzg{zeC22O>RYH8XhcCC>7gYRMPS1A*t179j_gCsy=!#$58Tlou_7Kre%}^@1 z7Oj_wVGkdC#Nr~B5+CfXdLO?Skmg+A=x)|K^0)0s6>!pS&ey$oH~=@MN=UwaheY(3TkUpu*P zS2d9E6w4I%JIlVIw@bDL)Z*tf_cfJBF2B5Z zg_s5$BD8k{U%d;yjZ*Bpo#x8*yKEu4N_r$%rQ4e}qbjn6DWN(bV&2G(@V!bJ>IBn6 zhq|mP@yTvef3um1I9}hRLr}GH z&fEg#8Y8!uqsGnqUR}|{kvq&blm9_({5lTEjc=*j-{HV?1Uh5XHE)6+q&Ey_3YnD) zu&HY1@98B<;oLWIZI9b@V&b3JsVZ%qm)x@Bk?DwP8t?^q9yRV|9*4i!@N$x#!GfeF zRdUJWMs-Ow--F9aID=#BVXh3SOfb{V4rh<73dGW3nQ*pp@9>IUf zo{fk1#H~NoSzMr`pm4>y3f7e3#q(wJFjS@xr35n~NteRDPqA@liWE({-*29pz2CNY z3FwAN6t_PG?w2gG*gYZzV-nwu?_bH%{l$*w%Dh*+{LCE9K4>?n`@!WPe_V1M{Wm>h zQ4j)h&6keLJwVL#yDbN1dXjujPEHnW!=QN%C=Bokxs_))DE!s+hPHO6H?C&dCgeEV zWhfY52I0DLXP~YdDcM3BySet=JJ3oI{HX9(u|K~0>Dx%|McXK39uFFssNQ#Qo`(Z7 zLwXjjNA^}Q#r{5!jq);9IHFOz_D_1oWIu7l^78#O1}T>_2z>oueBFeMuOY%_)+Fct z(j~5)wc{;ftyhc>VBgD1#qls^lRxkDG}2Jj=oD3-t2%I6pN+0O|z4b{rhkSEiO*ySdhXyh@mWxlpZ z#0~+tPZm96MSx`DX@s zYsn{%zb6_-F}-d?QGtW+bgxC1<-^%;Sc`?w z(Xg+ieP*2jV!9vv`Q2KUl(ruK+VouZv5OdwifA!5tJ$}?{8eex#Z^=0?a3c1{VGK9 zzt*(!eT?wZhO9)dw!D0n#WqbYl#Y6(Pzsp0=B1WD?`S?kM%nr-&X$UfaVC*BI4&tJ ztf^NCE)SE|Lt3#b70NRw(W#G&iL;AyorL?C6WlJEn4{}gu+wiCwP=|3%c#9i zY&@UYjQ?0*d3u2LkxTsy70Zdpx28H;`G#`$a>l63-j@@v+sRO}$UHb!Pe=8Q&7!ZC zCi>C>(;S531j;7}`E(2Fe2krkb$y*mj%HDol(wsw*#{=K$}Cfa`t}m)TxU_9aOeO0MvD^=MsPU_O45Uy zh!u-*E>TX3xL*tCbk`ucgI>EyHtmg&R+ag95p?E zE4L2M84^DL6g+PACWF{CmDpk?xW-Q=&dWcF8~wmVLh9a}acSc+eBY?uOghp@on(dS zas}Voj)ibrSn2;<8+X^U*E1GfGGual=R55nTZV62t$*5$Hu+MX50DY6!xjh zE%ru9EjIfPwZwVn7~h}8^#6T1 z(1K3D)X*@Porrs7zPwwf5l%Z&&F?^eCvBg##duA=VdoNc!l@9gh|A`CrOE@;wKQ{# zQUQ@84(%aNFX7L33ccvw4y6=`p7HiW)#t=D^0M=tCRaQnY7`9m6Z!n)`C|_2vVIzw zmamc?b*X4aPfZjC&2C_9(;|$*s+Jd~tP17)WY$?mLmu=VW>!}SCm$!@JpB8)k&?`g z{H8nKP~nje@=L)#Ccna0S*|zCjLOHJ=1^ym`{Kpm#NT0@+mBng z+!H`IXUi+Rnon`_1r0A`{f!osN5%&q0FLC|BhfZ}Y@2(~iswIN-^&MDh@~bJL_3^D zX^;S2k#=B#82-1vrw-=Ym+Cu-0srS`sFHWYVfOinAl?c=kqqj6U${>b3REv&jGgf+D_263&gM0`zMGhB0JR*L-CfZg;E$svE@MHg@1h2!){BMD z$xGAAHGF;#;s8jY6h`ilhM--&+J?V>ddqcVGT^zBOo7*+o1I<>TYscvzwl=wkpN?y z{l%U7GlufdiHfctyX8Xq?kHKy)~={Mg#zH>yOZ^1Z7P{JqRwLnGp zkPTVwXqJ9t0;fuJhG|9C?7%&~X@&=VlKq3Pj&a5z)f?b3yYe0io5;xRC z2+cn%w1fN#OY;_^r&qGcq%GP2w@2KGxoygDzD${C>&Me~kv05zLbjy^4Am99&TNZ& zi>8@_+MVyIYN@L4&Ea)LJt$9(tkcNP%N%ugFDIk#t2`veX`AA7J;g!5bHehJTN#P? zwUw6B-+oYk{rpyy_g?-uOOQT=NI++AhUxJfrcz4Vhk5loOgQtMSJNv_RUyts8S|ei zysT~>&vxi)+jzCu_+2OC^M31|q~kA7Yy2Iid;9NYD9^A6(WmP&NfwC|6-c?E7E}EN z1@^v~`4lbE#9q}`3RhU~DG*{jA((?`^)bw+UQkg}ubf8PPV0Dq>D*ln{w?*?A*Znk zg}x-y_`$;rO={q9ZUpC7XYQ#mMM|v5A4JM{Z@w`be6sS$+bYXBX(t)#_L8KX3+~rv za99NrT}RlZMA^J#D7Jmb8lAm#rZ}Oz67hb`y0siVdX%`vPpjj_r4Xz)&QBj<4PtY! zGS7ilSXQ^C%r7h~JOJ5=il6fkiZ-0EayuKGq~M7oV||xcQu};Ev$tWRi%hCqU~7t67QJIjPs}1(tKnQAZ36)( zVM))Kny(%#>#27nGPJAA&>JoNgocv+ty!lJezR;vxQd!phfchfhUk~1 zAee8Z7q>Vi`Oa@RN{#lD{Np|*K0KLZ;zU^+t?O|Ir(Uhh<4F8*_z-=_^W>|sXOo5t z|Gt17u7nF%Y`~Y{vixbOeXwwM{yYJ(6RsM1lD1!^U;esv3Dwy{@)HdS=QKg}G+D-f zUAwlOQ=RTd=`E+EZXgV$iVs!*zrg-v=+89>K@7bxpNt&L+PXu}r+XiWr5?V zyGT&kD!)U>b?D;zxle?Q%Ah2IiTG=KNqKg6GX#~-9+-%kyC=jY-4=9ZS3h0a1lLDr zE|ZZjnLcZ)vCoQftdVSyA%jn@Am+BEXGf9Mr0$LLdmFW;402xYkDhenD%E13G62i` zNW=-l;DBl*uhB|$+p1*Nd$BK<$!%#cxL{c`!FP;2>*_W2;;xmV&cb)V@SWAgotHhX zuBs~1VUZrdyFFBH=P+D7PcC>4SOBQEw{m|Z@^Z11(fM_|{nIww%8VLqvq>MY)6=bS zl4oV@W#UwcYQlFbEG6pgfCvA-$%Se{$-{afkz&`}CzixWKe>D0QXZypTM%~4+FfpV zamG-*!1ww|1BvNcJ{h9Ke?tq;j%Qb`c>IiA43fCB#V~zm2m2nCU-{#F>xi~_@>3lC zAJ{_ms9XOZl8KIMG}+HZlfOK4Xn*yplHyG#l^sfC7Nxd_Df;>e6-zcthNztl z`t(zKT^q^R&o%NjSszNBK3}{yKS)85vy{*Et|q9~f6qBWZrhDrK>nnCvg^mPeW(2^ zYX9L99aivgu6yRntCiu!e=}cq`hlfW^5`F*X#an53PXN!<`JzMl>Fr)rthk%)R-7Q zOiG&U*9Wp+(#A6IwO-uq$wDZKg`K;#`@6@%1S@fs2|@^q058;MK7bcul{aPM3mkh4 zqJYnTf~XoyGKjLh46F*7*-)7R^+v?iva+o-67;{ji94qOkjy+cS|tH2b{te~><3rz zFkyFonLCBN6RMj)s6;HBfWmNB<7bh4Q**cHtf*}tmf2mEW^Y*BVdM>;NHas3)Y$nh z#&SgB1nNLitS$-3O{ig3qhA8I#=lG)-KKEtC@4BPg6;N)kjEyBKwY9QVMV-PemVuE z_u@ptJ7X!3)`vPDyTD{9ayBhBC%FI-0^L^8`R8Z3PW0CTBj$(SE;Y4L;m3gd5%L}1 z01bsxA&xbB?eFS4!St#;UL#d@!i_6!Df*PyvFR~?8a6eyU~uJgqB_JBWmU|koH1qF z!uLM1cmSS(8MKi4FXR^i75vaVd|v3%3UCC&;A_i%y4y3EcUiLM)itY|`NotP8qOs!c?5L7d_s}>y?blJ}Hgk2Cm{8fH_j=T$cwUhe zpQ2;mcj=t>#1fd(R&aerJj`tk zSW6M>3h=o@tX=@uo$eT@$gP%WviC6Pp6p2p>rbaQWMYTO=K07SK#vpad1W!Z*IL(g z&D$`%GFPA}xUsbnmr2mwGdWGh94U1_x93jX_V`vcN(h+O0{d+4T4#;o;UaoW%mE_q->k|Gfgp_D79$cM`yKB&s_Pm$pSPi7x?R662#PTgtT)mZif)dq7&9`zS6eilIf1 z?1)_?GIUQkejBcCbB(2w03x!fvDIzym85hO^k!Ph^Pz_;?n^^WsRx@Or#XZOXM#GC zE4LU^@F)?X6ETW0N63z#@SCUs1qQ7Ett0P=&tov`kUJd-^X(&-G^%iUb@cRk%H94O z(!fFZmq{ymiV93Hn{!$VN8qf|%IC(n*nTVFiOZ~Bb~qDy%&bp{)%qF}u2wxIDgMqz)|*5|DAjYs-QtK=B1ISs1nG-qL0(UB3Nr|0@0QIP|)j41?si zw@)njyS0U#xu^v%@P5b0#6GbU^tHc%9DSbz)+g|8t41*&KEVY(w&z!|Qp~^khwhvy3Dt*o?NQTkf$1wRU-vHx5xW zMs_+jSyBi#5|;>Gh-l~61ZC|e#XB3q6pg2my{llxC^+Un%X4>*=Ss&FMVniDo?SRA zi>y>lg^En8tdrH`RV6#*&E%m}vC0LK=7ey!f~n>0gV3ZZ3f;nj94fv+Vv=W(Ar#w% z;YO=LzU=Dap;P|9Ub3K{xrIkW$gn?7($i1+kOg*b^r_-(JS98!F=CvDpDCy$b+fQo zxj6LFXomhf{r$=r+s2mRj4+Qx6t7TF@|Lwh3IC~h$I!QvI_fQGh4G!icrxCeA%zo9 z%&Gi)&tK(^Z<;Q=a{cOskK|1cbdfn$~XJWo-p(jzz zRs4mqSFTP$VseU<%O9>6kpFy{2Y;n5fH;I4m**Tr| zuNRv?{&+*JBe=iwYfZDn&|WD-NBQgbp)9{&n72A_B92Xd)s%XR-m}zQ3!izL?Em_T zUhCei^?H}|+kaAje`=jv9u5@hg3Bsa4(wY^$&#nPG1E?Ipl_#1yV^Ls_R!Zqg%r-c zx|Z-vx_J62@9i$b#72XkMPs1(`M?ex8WknpZHn^(wyah$TKzAt7B$aNhDV0AK1L&- z-X34*%UFh>1l%%{*BHJm@Z3^oL^op~nSu|z=ic7lbur-hb%n38@+tWJ`6Ufuh2duc z98zsux{09||HJpfUa0L1n*Knoe{D9jolQdGNUYuzR!CYy)FMcmfc_cm9o*Uluf!Ap z{X|{EBO{vuLggaUEX5!WXJMKgHFCq6?=K^q0J1V8A41Tzh&=*UCgf-8DEZ|(74Bkb zn{&zoIVxHM#YIk{Yv+oX=}$y>+yM65+-qldcy1oI$FQ9vOaBFe1LiA=0wnO zDrh{&D$J;cy!Vl#M}=LdisGn|ZxZ~da-u^zKbj?%S?eVP?GS1TRtnhFD3e8<TxFB~}Nz_anB#80G_gSBk=usCJesJPJx`v zLm)*br8h05K){$5>gB~4Rg+6hCSRX^(36b_i24S0Ot59nHTfB#9niJ;nA!sa-+|RJB_70PhT{r5 z;BSj1h)tz{N53DO`el1#Nd=$-*{JQ7spg249u0*gX<;%{t?ks*6x1gSUV2Eb2xBt! z)PPSAfD|X_?s{(RcU-tmkI3-g)?4VI% zqujB_cjG2fODXQ8;J&_c{-Am78(Vlg>`PJ<5ENj|1I#MoRs6<#hEPbzerI!~BH%7i)Gt7L z0d@(7>TZRYEpTABLVzDL59_R^?Ts3inYu`b9$Sy8^7SM6P zE#S7gjMVTUKh6~1r7UxLNqvsWcQIC+p9BxNX3W>k8oZydjzQH=pk3T&(Z+1Xw3`#F z+MUyl&v1Ch?ycyp6mAZw-G+mOMBS#rPXddFkXgo!91wd-`ufih*R? z>yw{8e~^6YVHTz?9lH6gQSu(&zHEGA#&*op6Slj%dxSVPKnpfa3!|k-V>ePH6+8u} zqF9T@0@itBs*{pa>BSD5k_XS9+>Mr1(y7j3I1}DJNopxtT3T5GAhVn!TlSwO<}>zP zqoZV%wf?aniJtfc=??W&nA>Q*xJZnbQzRfD*xa0sMmRRwI2=GE%-=hXkNstERLV?T zCioHeE+W}rBWTo%Ha>c7Ag+LZ1M7A8?)pt_`jyN_kH~vAzCqt!P2Vq~ognIOR!D&* z;ZESh5`Cg$B@aQL&;IEFyFGZNh^)n?(^%8_8$8#$?sVZw&DM@LJdw6J^S?4dJH7hw zwkj0j!~u(O8E(%qQ0Q^xYKzG?0BI+Z-A-AP^IkXkl6kcvpj~zyzDrCH;Lqx!{06VJ zXBQW4<{6X;AgltIC_lSP3%gO#-GfI2zhtGIL*AnBd;rcoqI9F$TY<4c%SJDTW&To%`4B zS?0sCdA=&ggCWQ-H7R*_XNOsPl#^G6j99KH7rI^BP&1h!<~4F1Y3@H1j@!y7)Zh5T zX|=`CpmeHhcf?aG2ze+6VZQbP1QG(^MW^2T!06)y^R*TYppQ6sdC8znupX*OT;@?| zY#sCls-;R!#HOUABo79r)&oJ0{PzYFCo&%~6T*P;K+WjSMz5q=`~C4RYon5iH3O}p zMs$Y0ho!9B=Uq6;>WNt<(si3XHXnPG2YbiNX!oD4-EVBjQ;ZRLcQN=Fyo%j=;T>)O zkXI)FaN=cxp1LoNguw?q1)(tXDVcj$+?j3WFKT=>V4I z5t#k=y2R&V>HpfwELBE-xjGwlyfZ0d4aNnHhHbBWIJzj=DC!lWS-Nf z>vq689G3JStudKc2+g$?S$3$kX#Dt`aR9BwSHX8Y8ee{m`Dqf^8N?mOfM1-mi?A}{ zEzKrGh@QaxKi!*G-rwIJ;gTSzPvD$p>zAC@e{XI2uc}F6y$_CJX=xItknI%=c!17I zx}Z(1VP4C)U7zA>!EwV&Y6G*l;kn&|ys6uv+$$pVN>U+Q=&J!NBk8wOj_hCMCrE!o zh0+tIa%fjysE27l&X*KKQA|&r6q0!a@TyuW0U5AfX5Ha%?Y?lq z=)E;H$nUNfQyIZ){*GDq=<(y-EGDKCeaPIamDL;D%OfgE3ok{@iMY_G(%ISB@K#oY z%T*sbUH%=}b*X@9+g z+%bf2s|I{qE4lFZSX$Wb&y)si`a&f($E)<>U1+sNzGM5E4+7vts!#rg3A&wO_ksr4 zDQO7I;LTe_^4};_Y~C$syz~ogwNW}1uRd7|Y3Yw^a2}Acky--;&QTy6i@Lf!-vjAt z6E=PI)DH8~8R*2oQBIMPkeGe@81u%UEH9geC>2jx?j*JFn4hvdD!jw{#Z+l$sK*1> zoq{cc@eE~w0NDb`le0#$HNqF-0$%0uk;mHrjL@NC>!jiM##uxEpPl73bbrT zevojeUOYKsqRy*YG533qLE7S+e%W37z&B)GyC|L=+08x1RU?>+@ED#|z}yxbt{8#z z+R3tUafW`qbWg=}Bdufo!_NuIGW_=qG;4f3@mtFn1ZO_xHP278fZeE@+qr&jp*FJH z>X1mnbd-I}($xTO4r2Vq#-&3$a<}aKYS8|x8%NJM)l`LdqCez)rCe6HTUiwmo$p#R zF>Npb1=HZVx)6pRXe}iYTu(vzYBAE=i4X&IZ)j%oDTN`~yXLrJhn$T@{+G{jRg2UB+O21m>OT)^YqRwQg9$Oi#=|#AH#xN$JPyI5QAFyc$`1u%CM89 zUpgRILh(CK#$|p;ATPnTNprDBvO}k=h!&vH<`RfRU#_BfG^%%t+U>~)&gF?gZA2e$08s9R5#dh@J|9f(qr`#X5`l^lbfGCG93&aslSUi{gI&U3_};mSuYYI zcQ~{}gs^%YffR-ql7DJt?w=|=56)N#lsu7cy5}rovA7FW7uQ~!3~TvokNC76tm)imCZ0h@3E&MRdnIb4zQPR??BiC zA)I&mlp50u-g(p^P=)Q)pdXj0)hIWrH1`!T`F&t~d;K5Aw-AEl|2rlewfhff3w>Kf zQF_PdU(L0fD=HoeqL!;L!z4xTUdo#&J zf6zwe#16e#oQnM!WdF`Q7ema}>qu|UA~DJ~P>&&JlM!39YB;M_gZk`f>x>^grt!>i=kn_CsXk9L@x#?995UEaXtm6Z+({evZIT9vZ<&Mvm> zuX&2S_Vo?g^}()ZVq^gSK^@>^FTodw1@kKN;4IxOw~^?#3b9um_m^~K69eCk+(#{c zdwTGh8Yh=6%RF!=@C7PMM029~-B$5V*d3kWBK%`>>auL>l*bAnS+G|8P@<5Y3gA1rdc4P*xcXoti}l>DPeZvN5lB=Bg`Dmpb+OO?_vzhu{2cXZg&~VyN7S zdC3;Z^tu9^-KL&~*UsQ==Xv`wcCoL*9St+k0yT^1AOLAElzOpt(odlXpMjLg^NclP zTj4b7(3$B{nHb|a)2{GozJ2k?G4yGjlkq!6hsA7rm0*sVYk!6t&qXAflFruM!|2gW z>5222d5^Le5PELAoh1kP*bF_Zu?By#{SP}Ea1Kmashw97riu)@xp?FaFi)efut ziRAk#i%gH^dR{~GrYF=;pT%Uz2z`nV%G|lb?)PvfZE`{|dRR8?mv`!T5Vq34vYfdz zj)Ct6Q$oc_Vz)jQSYfl&HX^D(Gn)*I(NtDML^z0=uO!8QCF zN-6~4lGEwdazXmeBfKL{YRABVXvYDgVp0leTtNpJa>$NkqXcSK++^|VlF`txQ)wc6 zdF)Gu5{PVan2E0idva-#_EVmAQ1LDmwXEB`hCAwGw4T3Q{!vj%V{%@4TO9#e_<21r z_bpOy3RqKN^*m$syVfK*oZvc$K^5lVR+9Y`aJr8aA$FT4YX|T;%AAa`@521|8SB) zxZYQl+~H2K0~cby{6S}%Qb6udoGk9x+?=|79>xx!^563Yd!qBksQWb1pbAU#yHV5E zzx_b};?_XmTxk!q7^F+fl2oR0a5EdOkg%d*5ge@Oy}q8={Z$&05g$6QQM~xe^Iw%p zQR+|3ea!oqIGsGp@;rW~y64)9TAgyPc*Rw|1xYQ@RLZW?x{fBF761`tE`Xkp$|@87 zmk1g?I(o?5fCxcX$TXTHN*Wv@9JZu)%u5Ub*A1;G6lQts4uz9UKA7ijNzQ5-_G(J6b3KHp28exs=8BCuuT}Ce& z$2RyycB+(4w+-f-{h&zayqT2T95f(cV$8W+35JID_#FVPv;Jdr2p(}%7rGIEe#kHLWOn$%g*&J&~iF2Jb6b%Z3w)h=I*4!5`u{+ z(lUzjlkPs{Byl$ViVn=r&3(K_e&QycT;~^_&2@eyUS1)2PgF8`t(wBgc^RrMCP5cm z_d0tQH}OF3_aXftt=srdm6w<- zGNsTbpLE7Ye&xMA)i>{DGK*!j6ffVhutWWlS%H!c3HxB}$x8Uk`|TKpYdfL#9}ZsF zy2HTN`rNgYi=hsC->Mzb^8660ZPrgf*zhH`zC^j~4ocvce98BMIoQgfvE?y*r@qHBF3{9az!+Nx`1m2A>(gXg0 z-bmzpT!7O^%fLpapO!NzJai*)u#&@?x~t6$d)e)#bhr#j(Hq^1sR&F;y~2@(?!Kae zCiz82BADPt9=a@^riS;W4cVBwX*fnUu7W;$>1J;CN3Z4mvGZ6vsM4axiEA^{zTUX# zBbeo}t@TKrCg#Zl`g^|!vj9+B-&-N*{5UX(i2P&xli)A{k8M4xlbR$_s5gV2iuC<+ z(5;63X|u&_`8W<9gm#&O5e>rugO)&DgZ@q^GKiKOivj&-+hPFq*eoxPZ%9h}c_&** z$JdAX(GPDn;|Kheq;d2-yD8hA&hrN+`<1i6=~2p5nzY|RdWd56A`^FW;I{i?AYyns*6}j)EDBYjWp9GQn!9#*pI@L22X; z;|N2edgXG@ofA?lBMu(iOlUi^Sqcq$w3$1}GEAn*$pl|UQbt~mfCr<$i8tBvjr?fi zvHnB_vB(1e;2XJvfW7-Ux|p4m?TWv3==jI*_|IYWZ^1yw(gwMiP)iY0hOWdnL&*E( z(FZfn2S9a(iXw^A;^MdHN#dOh{o~H-T!dbQ&gb0MEG*9aUjLcwJ*+?0z6-4ub+Eas zBjQ$Ojtzqm8`03{p{liWd~EZDC3z9ceI*XJkH{gr1OAP>Mt@Izu7OW8 z{6mRtzZtR-x}$B-vuMiM1DeuD|LF+^K`I|`A86$nN}>2sC>mr410BU`MydjoUJpY; zreWrx2y?TfmWhLG>^hCJZ+D_F5i;+GQ3w>s+^_%0yPns;sUT03ymsywNfHa9(-S`n z3t5p(A7=BMBGh4{5wGj#v3n7imF_@I?Xrmbqgwuf!e;84bNYc~ulwFQ<{*d-?asHH zoP-dDvj~9}%+AE^2JVAqqm8R$^tYnQdFrF6sK_90#{mDq`iEgHs4)!(!al-&gUN{X zfx`7l2bxH-lNdp;CwsZNQS!pZLMQrgLU_ufq~x~Iq-0dC?-?#R#_YM)>*ww=w#KBz z1or08yiR^%+F9m@9_VqI-%2x&TgBciPVwm|z-;j>ujo*{KC-q=@ zEPYGDaYU3;4YHfp5;%9P^L(nMO>lE($n2D)-gptN7!bQe2es z;kT%SJ%ptSKgkp*5s4y}#~_Q8?MDWyMntKT7-Va=EYMN`NiR{h(v@T0MUFw# z9F8fkfPesmf$%v?ok<(l{16I-So(7G%;Av~___k^CFzj6*|06>@90A-2dZq`Hv%2x zf!f?GWG5-9Wpta@(v|Ns*O>B?72W3f(~r)*@>0pgYSu|WMkI&OMyX#Js_h}OJElvlL30#0n>*s1u45*Hz@O2jP)qLHs(1qa5^PtYbdFr<^Ey_lCpSIe>*{=Y!~%dH8@jOe>Qr z?J7cX)7@m|+3&Mj*rFUa8PM;Akc)vT=`zi*g2)t;T<87@VEzhTF(A6fwbh zfetI0H(y>}P9q?yuC6YotV+6wgV6KvbPy>Iq&%j~C7 z99&F6F}D{DBDrxM?;mbixAP&CiYU1vKVfxZ78J`VfRHu8)bc(!HK~y%Hus|jmG^l1@{DW= zBa_8!XFc5fkh!E{=TbF3s-jpBC4maUBc)hP`ch-~`gg zjNqVAS8GA(jus1No$lJFjjj}Wl$tuEGVE?bXfhFs52PYfX&&dlWLb!j(0N}bAmEzALtZh zkiPfE918s|Pr|u02qDjpuvUqhUHW}cLd24_jO+zuB^PZHC#;%loVK(!UeOYco)l?hcW{j**wfguMkeARamQi6Syo-n7D^TTR8K zfn7-_C&`g@UtAt7(U9@BVHUTP_$&zjgvQ*Y_r1W`JOAm5nEDh)Q9^}W!5QctMB}qajo}IEl2YiU&$}3Mr>JD{F z?HBulqf>{0?ADWm~#otIK{duvN7%ZsFJ6bwmIqweEkgH@`>{7A8& zCIIr4=j6}h&c|6bJcn)3aod8>JFs=A%yQH@XLZVxBFLmS9j7OIyu$z6?&kN# zYjQSG=BrokVfj;}3~*C%ukua$?rS_oQir>z45An6BJgANP4a9Uy$Xj1`!XcWme>Es zjvkJ6mcspKxE(uf@W9mpdadx^=r!m#fs0}B(DSYCqZ}be z{?sqd+h(Pv>qe-8e$UB5+Ai2R)ip(;Uo2!yK@()>GI#6CrSfUY)?$sPfDZpHlS1(6 zk+6v|ajchpGNs%D9LbTu2rozuqGeBlg<{;Mkgg39eLAVvaxeJ8c=t&qZi8EG0 zy7q1>voq;>w#%ahxWR~yb5F6#wmy^hm8N8tG?BLj_T(^vYwJ|Lyt6kA-9Aq%fNfMA zmHHKXECC=(;A=&GnvVvRG8ke_ZkxUCgDnDadL~uI67H!Ur<{@!Jc$-MPh=u>WT4E^ z7!^rxh7wx$-``HpzSdU1RsmN4wG>I}u&b@qRJSPo*tJ-B_FiaR=`Kx*NQBqk4-XYC zqM&5|-*enZzuhLLQF^DuGN`q^MSZ@|)Vv-)q9?xB+;8N#+jRp{`&!ufJ)LFQvbavB z0-H59G)w-S4?~50`z%4pR$VD!Gqt-9*Zg<&mR&|~x|D@=TAGaNT4t<0<3j=~gVw&K zytj7);A}T`*#Amf(}fNf#hFj^~Eq26k=*cEf?nk=6 zzIK^~X^Q2Os!_1q&IXPlRe;C|5g>XG;c;v|4d}eCmRVOthSJgmdJ)XdO#^8MA6l|8 z1ijV>uJ7^TuMr@s_Yfv5)7U$$0JAS#5I_tHSmX*}<3!DK)sKu9ffeU@()kt=LZPX{&m1`siHda~uK^ z(4J~h{T4kNOtW%4I?esS7KVOt}ZWV(7bLO?}ijm0e!;W`%R=$NL zO5(ZR+lP&?zem>e?&;I(vOZm^Z0hPwU770EANRytS9_ZO zhyh`P=F2k_#^*4_9<`djeC^=i#(&>;~YLD-s!lPjpJEEqYB zB=8Me!fn+O;B&}IP|8#@vl;9G^MUgVj)hlK^n6kSI;+oZK>nkiGg{V+OJa8ioL>}L#1%}*G`xUCg7=d5|JFBSFBfL<=qQVjB?i$aUPC7 zM;~x(rkzt-x}B3tR)m?Iz5b2ni;1npb96Z;SgE553N)2e=ysTI)7PB6X6=#aIeLuM zYxMgsz8UHhx8B?LwDyz6d>%R5PFL+C@w8ys2~eh|D-EGl(a5DgvgA%fW3D|%(FCsI z)zfGt-E+ngPiWhOs+gP=f8Hfd8v}l`}R}}Te?`lQ-~m7H>cHZJ;xZX$l}~^ z*%8S~O;zvn>DdhF;k+AK)TIDMw=gi3O-)4z%3GSCR?PhGEXdjPo})S3viAg%!bzUh z+k-Hb-d_`FA%AEHl?oNZ!Zd6J`Cz%q%+W?1i5|@PXn%Mwt8;1|_#sr~NlMaO`}EEx zmtcehCYr6ZP?aieer^nru|79P!tP(5K7`WOHL5 zWLTkgHExV>md*>P@0y4|@R@)~qf?50aAWX|h`YCZXs` z;AdxN_W*D*wvJhNo{DrnaVEK3ZrN=ZYl8S0rzm3rsI3`{x_{>`<6gk~_SQ|;M~hpc zfp&^W&8nSkg-rQae=9w)yyWgp{H9aaetxY&vk#kXW#~7jMVuY?yQR$4Bb1clyOuwP z%T&OkLdD9um>6peL_X)w%JeV5w|)g;J^`QEe3l6=KVk?#dK8dw?caAt9;RM1*ixfm z$p%))6(Yy}P$&g5?8I8!s!ma?fNS6+jH@LG@|ws|5+hDT;G2az()zN1Ja)kc;ciP8 zf6mn{B!oh->TKYSJMe-Z4a#NCGCUe7|Dwk*boWrA!G{8$oYut$e|&y68gQ%E_@#6| zLynh5xX;b+`}aG{s+X5-39D>yPI^3A+t?;c@s~6eoXXuYyk28@ zW=Yxb3;VmD-#s`vrq;A^Dsnt6*ZSfMb@g>Gq$P8q?>^E5ad5O$~T!u(S1@3!uv?^W-2J~OVXK5aWl;0HSnI|s*gwG*28cjRb3$-)YK#HPRs zq7F*2>-7^DQ-aOnC7f80Zm%taJGxz&MDWVI2p-qzB=}IcpczhCsfoYTaXbokJx0X0 zbr8h_iG`1kj~tq{Uu|6lu74|_4%CJf0ddIUG8P}F*ezz$D^%ZAZOrWd|iC zWlK{SO{cxsmPA`yTRF^igfO$%k#2Eb-VWNmuDd%vBP$ilQK-dDHQki?FDy&R3ZpAy z6|yzPe(PzhhBGEcaFKQ){mHO{wL{9)QLZ%-B+a$If4l>~A_vDW$9Cd`OFgPqdbM=& zfnTJjRo4De{4B^ow9#;mD)G2@tMu#Q`R}Ic-SYo-T&Lr~_KHwvkAa((b+&cb`Jw!agd-b~(-WzGonZ!{+wFb$x zWB8<|mX>F=(@V(U8h8Ak|3Z$r4&h_Q&yI`;eK}G*Xnd6q{a;l*g>i z_h&%2*lb@Cyo1EL`K|So`3+geB@}3G3nr%%D=fiTvU0{3kC|GZ`e7g3k*8buvh5j` zybG=nNOihhDGfK80Eb&3|7}vYA+uC%9ykTZ;mn0U+tBj&=`wA{ucmV5H7rrBFBRm6&_|@(4C{l<2O$`bfQCF9_zUhog!v!VpS2No9=3u< zGWHS%GSoaphyqd{NIdH<9x70?X@)8|s8n+ip=?E4T5@p8(J|iTQ9eJ{Hx4|*Sw3vX zh}@@dc{7-bMP_FAnIcBrj(|A$3r~Vs7d=SBeZ2|R@ehy7ZIfMVb6xhIQ#;(~N|lp* ztpVg3#dE>N<~$e?AH9Dss%>>&+_A0FypYD{m%QRRrR1yS?5RnnwC!H(EF4%p*QKcP(#Zm|t zE5u_SPU9LBe#N=b;Nmvv1CDAR(S{Bkfg54xZd3d1)juk~$ETwzLn4W~R*L!p-=fDrrREMi5CE*uCN3nkq4Z> zSh~+A16N~tt$*KYN&ZmAn?CWV+PUBhC%l$SZqw2h-jU&OTL5aMO#J*;+occwUOzu@ zgmMf}^PK~<76D;q$m*|7j0n@E6I2cHPxl3x#jUy)7tn5QZtWLr(7{VUviug+Eq4oagxA(-&Ki+j`$T5lFt0-WD`x6rkxA-R6v z$N-*0G2(ZD>v8pv^PDTfzll#sz>V{Itm-))ZM&G3ixqF?6-2~sq$(0}OAq5J+X@1R{Dm(DeBk4GN0k0#9C z|J`3}2Oj&9HQ;e!_kgTeC0!s5jDU4WZVZ9JYWl}9nOZQGu7Bi=F0VYd^EfGK4t~`I zTTK=ndnWhMk6mMnwC`5^`gERd;BPxTG}^28;1v*bb3L7ML`vMq&8<{sas_gS1t84k zm7Y@??t48SNK7`O6T|8;K~M$l4z>}cNgz{5TvJzHgGY5tVqUe-3!apgkWm@9K=p7N z*S3$YZq`ZOT&&&qqaWhq<;5?RxPizhQw4oyBh|DF33qT{F0bYzmF4|w?SmJ z3Sn_elmmO1!1tVPD-(ZJ^QUZ|aEj@chYkfs9y}pmu%Wtva+r^AQGlm&=fElV6sUO1T5(-!=IIa-u9BaJ$C=&h z(4kKvJdb4MroF3vXZRaD+UTL3(TOX4f4aCcggsz_Y}871e!23;cfgA8vir{u7QdeV zP{9u!nRhaJF3Xic{&xrtNjvP5FUG=4e8(B=t8(9cmAl;7UqXLRJ)$Kmq=klsW#kQo z-1`hERSk_Yb@9Qbwf{ramw-dLhJQ~1YC4;eB{AtAwlHQTK8dNkx==(cOs&qROq-Jc+O3J z{sC{c+oKZ)pyDlj^Jec-C3mWW_Y$nE8{W9;K>>`5XkM`2ib9#>&VD~9e$sSet-H9@ z=%M?Ih4I$Mt^4aFcV+fe34CA9OUdd#XRvDZJgUV(Cu;O`{)pH9l9L%$)Vh%x>aVHg zDE+ct@70Hfd47pI^KwoaIcgMc6bJQXmq`6cux)hZ5BlxYu{a0?R^QLmE$B9Dfa5{h zRk5r_Nr(MXx+1=)Lrb9#)aI)Pjy~hHZ+(68N6yH1Qzx=D=+QE<z@4 zv#*m;D9nMnCX|1d*y+>7U{L%VKP1V?2_r!Kh02lDh!MhO>^yj(6^*@Wr7yG-6s9VS zIJf44{(iF!bJgKua<)d^))G&c(R?2&JZmiTO2v;!P$AnxD^{J&hVqX(qI7CNc1rOp zi)eSLfc8PjadkS{`0}k3xvh0J7@xcEhjXrFtaleRqUe2IUF|Z&aQtNJCqlZMBYk9` z#+#s`;nXwW{ZIv2kwfMaZ`%hd(w0c(LL9m6-Pyw^dR+JU<43fDV)52dEADrvfO%*1 zP||<>y32x2jF_4ERag&H$A?M)fzNZDJyWMaO*!bNu93?73CSIYS&@~Dvf;2O$hYQE zntvGzsa)~WGwm8u%S&5OmIJ$y2gJb9F{koMG@*DiEQo*9@m(IiLqP&LP}Sp%e0{6Y z_%40Pc{2fyUAl<`{B5UZsLI^%?4x*{T-Z-@&2@FHK&G9AykEK8u%a2s=qJ73s0SS) zY1+3>e>c6`PCQ+Zd17&Wr?h9AZriA@JJrZ8r|E9DS)RpY_@^4blfQre?gg}d zxW|6{LG)!$&@|2k7i$6yQHl(5$y?K;Duc(6Xu;-B5`Y>AK>Y$Vm~Ldj6#)ZhS{z{) zECJ4jGCR^pOileO{UF2mlK%ZvEJLd)|**Q7IfWY-dNunotJ|v5i;+*TY`w|%| zfPeOTXrL+SefS;pucFBfuACgGi(cX}dj0rY5wCO4SDZZ-Y7 zo-=Q=gT`DV&~OacxZUwWfz3@#B;dX3yVRoz>yZ2+Atlu!4EMf{n37dz5XI+JBO`H= zUr^{C`u4iAG50osCO5ISGbj!Kgiq=ibP?1w%>XXE7t3KeM13S5*uL#|n(Rs^eMlM}1$$Wtos^NK$A9yjEu6WR6m zXm1s6FAhIHAZy|g7z+vDVnJ>_ z7UGO^5l(7)5o9+OWBGZ6@pX7!?HiI)cw?aPU!{mMbBN%&8&rltZF1GMWym;NK~N*& zr~CVA44t=V-ESS#sNIhZnBAV1mNvZvds~0AGRx7^_2!!OUQ`nH7%T|3ANIfzS^mxL z&VJ|?Gh{vdqpox81Xu$Zag|N(%z47Tc}qptCQCj`G#*C2Ko-hIA8>PKXpZr9*5C#O zW=NQ!=b|^a_Vx84FY+U&;atJKiwbdfBWW!aA+x^g-<$Hqm8(A96bnHMZHrc|)uq1( z!1Cq1OOGUoBwTb$S4n-6+wazE6}(I;aiCRs<#v)F+kmR2z_{c6$R>4s8Shx=Yj^Xj zVfglivdd@5e{+PaSQDJ}Ki<2dvnDcfP4 z0JQGZ6G5CazU`x9P!ikRaTLl(rQ>{cse|%LY5fBgON{*9x%4Zb-n+uEKf zw+1TgrAjT;^7(cRBE(lL4h0KSc3(Sa#c|NtpMFXR)mj33uD@)0Qc^9NA_8w-7xd+H z$&Wjc%QK3sYX)_DMLZyaxw$_-NZMwrisk3J`^$@n)U%(yGS_pq;ov<)OA6Q39-tuf zIXlN}i6*iMrK-lK&u3s{@EgwMBqK-n7kJHyTdg~-GH9n|22cIjx8hth%IxCPO-(A0 z<+F2qkaYdtCpM7>?yr9WAHHT13Ofg+F%GYqP!}P^gYsHLW7R}6yt{I$@Hq+0e!`Eg z4`10@EE7QcE=P4dUkT&)81Qu^aOi$5>Akx{Lrbd!-qjv%6sB^U9Gj}(Z8Nhe*_|OF z#cBfZBnpk8^5r>sGlTQsEHAH!iVg}2ii(IBp48LRE4(RWWExWgLOTrX1Gz*VA^A?^ z?Gbnw1!kmP80#}hFlK|@Tv9~2ek*>8xz_!LL9?gFQ#x=Iy1mAW8?*L~3dgGUI&-fn z$J&dZGPSF>4l*#Sus?Om!Y*y3-oVVbHHrAOvgVMXx$Y!RKhIA%vM^+i(?Qx_aXLwV z?}P9i(=K8{8|~jRH%;=*TWqXEV=E@ok)3U#Jx#LTESi9&c~G%y2aakU(e|N{(3zWy zMMg$Orc0m!&W#8_>g9xovo{14ALI-E-xbJr>^lefLTM~A%q~NyFJlNOH-pUvh06B; z36CT6gswxkb(t0NKV92zh`OX~>yG8N1T$M-$)K;d-}NTz;;z^j$tJPM&}k^dijV+? zu1%=g7;HaYP2pbWGlPk>oXV4pUril*w5z&qiajSEusRo;+(hDZ5t z620xBv+8TM2uv1~o(jL79y8-o@pkZ`ZgG;8XTSJAeY8ThG-%W5Yey5 z6oHcjffd&dAXj*JuRF>Wq%$5+s~E$%(;x5+%&UD!#mZfQ`p7RR=n#C+^(h)C;@n)X zX;6aYBfhtweOHc*0U!JgA~MkE&Iq9xNiq5FDFfj+HFVVmc`O3sw7(-?$-O>EtZp@K z#d{%+F!@LG9v{V*Q8r-2)U6(>Q9u*@6rmum*@n?nzF^;&q|10@JYu1hP>k5OB_PK zOG~g6{`m8@VHW49cb`i*!qigt4$dYGs6H&4-1$i0qa1(@lUq<4^Aah{;#&)0ALtd& ztGbIcpGuoh66iOjkM?#Gj}Tvwp*lyx8T__E-%U#>epL?bDn(On;5QH2ufp+z@^kqJ zulJ#{v%w|Y^5RUeJCy)rP~9bz-@zTT!K&Fnx}GNC_LsEV3;phpF1Mn#%MM`;&~8mK z({4Qg#rT~(dP9lt#lEw3Te&Ho4L{M5q#531ydF)`rdypYa9M9XU@!i|6nFNlOVpd!1}r14@UXKAg#LXj_jZm& zfQl+Lq>15(1P93;FwCu4C-`CRW5xnK^x_`;dPW27P(F1m4tQGlVSZ<3 zya5KGv8m>jfw8fbw&GkS5zk~_V-N%(-Gh=|BE2_&6V;XR6++kis) zh?D|befp9nUPH0m-7!qh;obVUI$8$jvoT)#oV$>I?wd()pIt_L|FcZZv>xKZkp-P@ zu=n_3{*15n`)#bDr7leJl|i%TyCX)SNzlaH?&48s((q&@dX^TH!sW42_@7QoN{ht> z!HJljfw_FXpkdBjnwa;kTai8!#B6ilI%SVAWV(NEMVLyARjX^3-IIM)R{;XP?q^ttNcRZRjqm4`m1Z>_~a_Xtq5g&Z`M`W0q z`y@=?jNvL5xLUlA>C?_(`|8Y|KMyB0D=7qG6I$o07bxDc6OT{f*6vj@O6E*=bFVx^ zyMI$({LQP~%iy8ekHtL*<1=bnEtM=jEq+?OSY3R~WUx3F*HrZEH*IHg#g)NSf#+`?GWn z`r`0ybYTXO=sQ3u$PjUla#Htc2=9j;Ue=RaD@+;o` zsifdV>h@b6hSW0!~B=>yq$3zO~MkuI2ocqQ9hhOB*@TuXXyXG=Qejv0A`g+|DrF!qr=dR72VdlSkJBaN7Wm~CLZ;T+eC~A3x%0$ zeK?iyC=o#B6X9i|WI#i2*oRcjNLLvutU_}#mXq(E1x0^F%^0*7?n2cQe@B1;{5i;$ zf;}K~Oo5(LWV9aqvRjP?1LaH$?>~#}R@KPxT7k`-biVn*r$vf`RO#vMauwq!e*;wo zZv-c#Xf%uM)Xv`zM+4^F-dm0zE-z5nM9;C~OgYKs?CJQQ#vg4?NlAGsg^Y}>3AO6} zG*;jYuxx!Ouub;)BbL0*p>4`1{gW${HFdl64esdJ*9UneOR z3@g^ei&G2@RrxA%MtgkZ$r`y$`@*1LUYVEfT%fY&&_kN55|i*_FNS|_^(wB-y>jxC zpoDCem9rZbI(a9_ZD}mkgw@ck&k4)q@nKU$bKtGVQHP%!<=)NZvE2jXHE75&W zGj1=DQCrE$E-qd>q?(GK?EHr3TmlHT7seZnYTfDjb_wO*9gq+k!0(?B+k-ZxmwN$6 zJUykx)x)V6BTT^Ie8k__L=8v8GZ!>n3h3O(xS@=e)0Xh_|}A znV2|xn{+sIEe76%ZD%d{>dNJaXBG;Sd*GA?e45Vk#2P*2ARfoO4zk2YL9KW;m$D9GT5Jf}_zO~DblN>J&# z*9kqtM0t5+Th)0OQ7bqs5zTYq&ta;~?-Bz8%4haD`H+yXMP;Y`i$5Z?d%eaI2akH= zd`HVO7+o#QQtT6~l?aXE@l`*j>F+ELi@~b*=q*?H%l0X$&XMxDFRHS*c2UV!(248p zgT{fbw%2(T{f>3?^JL~KMoOlP+gWVsl}PnIw5OsqPyf6hz29x_^Eck9P^m+O zTpV2UCDnDUv87A2hs)m%Hk!P#WyN1;XAe`8_(Qw(aE^m1P}CNT8)-V4?B|r4Qrv!` zO1eW&fmGzjDEc55WXQNEy;oZM&YL@wnmnJ@^`$?R^Zf@meVi;eKJjC%7Z>XhOZHxM z%bq__DJw1Y!+0hb%~}^)Y~L-_>UO?POjNYtYS_Vv+`I3wlU%aBwNA!nr(xI4*@$&t|XQOrm1e-fA zgfYlJ>YQY%Kr;7c$UJI-w@LNwI4b>e_6`Bb;+pjp@l175RZM>sRL^R_NEV zd-F^cuw8;PDeBJ;U@a0;{uB`8y2~JBwRPMl#zeXq&`mj?M@ndE962#&dZ9rmGh34* z0+8}C^T%4By&y=j|HhdByS`p}`x$m2fb^YA2UF}IG+9rsiA_oC{X(m9P;Ux;$hw-TS^GZM=d7sT@DyO7xogO{B2XA(NI zj{d0%I8+#WG;S`?*PbS5Jt$bCof$G3eEkQnObhjaX=cb$4o#3c@3pz<+D)?%uIu!% zwUTLO#-kXY+ujrgs~_|1?6X|%z=%i20oGFf-4cI{dlnBQ-%6U8UpDTry)LvP3#!$v z-65zXcD9q@w*9D?MQl{XOITR}?~bTA!k&o-3OxOL0DCioYLj(1qe zla!s`csH>{lYN=ArJ(!fa3*^le?_D(kSVSOj97^lfnqIR1@ zdsRw84*sr$iSR;&E-~T8FopWmhr3APnlSH5;nLZXuD8dCJtwGchj*iP@pB$?YWF#& z;LVEA%m~gAU$|9y((f5sZbrqx#CUR2^+NsU&t+6qsW-`cg!r(b>zKcjl=)$jI&nfd zprosqUVyUSvN@X5da_K)|L1MdO%-Cq+?Pc0du*r}m6Cn1PVH&AU>__?GJk>~5!*!A zH+^pm$vNX$Zg?*C4rf(ERI~tnO4%>xmB2XHaicUGkJ19QyA;zOu;=?suzi2P%;eGv zgMNE+o`HUvKx_)%0aiYgU%&qCQ9KEGAyjnJ-f3NvwFbn4=f& ziA1Bt{l-}X31fB*iAv{p5~h9m5*^}=a{{L3ZJf-R=Vi z;YSy~7U+bH;%^b6mGT_W5ugn6QVQ#4H4II1>W$(yc(J=MZw$##pM@2C{(E2J>B7zaED>edHu3FTPqN3)KAXA0FCfUo(nY&oyr=p6BJUB1 z_&IQbz+g<0M9YWfX*)Yx$IKEv8EiilIJSkpI(ChM$n)S#uj1Rgx6C{aIh~M^$){CR z25SURNf)ggpl5vw!UGBvxud}687UErx3tFD?ItO+V;njiIzZQe1rj+o*PIH1AZv(? zSrOw5AV|TOyjO+xLNIoW{>d9)~1BFGb!;K(=r}i4I2fCQEN6EtQiRfW_-HnRMhKnC0e#zS-9crU$?7R zX@ud-o~YI~LBHT#8(7uG-M#4XA~=a(t=*+jFq%OEr=gzS*YonN5TwtksH90Q;Y^&@6e)F<~c=y&h;HV1Thsl4x zsiCpv4fnQ~dxS62z%I?egjQT=-6JtoGO+F-bdO-1_xV!yF<*aD_LO~minl2<^A z4&jzl5AD2FQraDSQL{PObM~;{%c=mb@iU*lXEzE8`4yegH$Qa`oiEcy@V!95j zOop6v?=y-Gk(O1|Ni7%(*sO}614QMexSY*_QOdTH17Q zfRWjO?1b-DV4lfTAg@+_O3G2cZOBETWHxgsbXDF4UTwDsW z*_!r~!`M&2T+{C*dn#KhFsoxj`|fR=h;+FZmOso52Ih*N#;noqJVsHha?$g! zsvn0gGSB9%hx!mYGCYP}MN{67DNRq>*Zg^^!AN!!COP4jzbKc_oL5--vjba-zA7ib zKB{+>Dcc>UGI_tTsj)~*;EKA#uJQ7V^8g~fy4upD`WO6h&0&hO*I58cptYM3Sj7~D zYGOxFrgVt zAqQZO4=?`oXPZLsNftWV3Sj9{29)d`c$6wNO(Td&?x8I%j9E~NThjXkZr9{XDQB$S zekW@kqxnv{etMM|zy9WmM*0H5hrgJXx^NstH2ryeYkcRfGCf}Z(d4SCeoju#oZ{k6 zpn|pu{Ci?_oc4mCui*3NEC2JnXuik(sQp%$yY$ItiS!T3<&NTq}?cl?icJoZ&N1UdT{*kx=k#0 zl;Fm#^ELyQXC332=skT=fB=*UhOrmN54Vket!zK08pI(}uyGb6AQhzB-$NqJM6K9V z-vX=dzl{LaIR>Bs3l9IDxWI(EX8FTK@U>+GW8-%N59gW|fn_Zxs1E3y+J%!(8qBj$ zsT_@Q=vNf4*!#~fkI*c+Ni8tI2slAsH#|H6-IwW?!S*%niF^n{UELwvLb)g`gA_t; zlYvVCWSG?lJFtC5oNI^>h(>a+*`ic2E`gg-b*J-P#2}K881FUu?mt4%|IhVXw{jTU zwT_GuODNyB?%elta#F%`s4l4F;qokng=)HV^JWNKgb!Fa&lvV7^9xx*Tz5*g0}Wkp zSDm3>2-jwEj`-YREq&T0%`!U^CToa~_5qhKcT&TRzjwAuebI2S|0?x-;YNN=ok&!{ zukfo>0WWLyOHU-etrrblDD*4+F%=YeVUw_u99vVgas)!+DuME45%SO|FtY+e1!yi- zV?ZX*81RM!ZiRch?U<0G0+4iQZMBrg-{AI6d;VMGqnDJ_LO~u;(q|)RX!zh4vX#SK z&2I{^0j=;yq3enZoww0w@&R8UjBrN&qqYn4IhMPiV%FFb=IrL^9=C9vX{)5LP?{iQ zD1pB1ZQ8InKs0RF#6@x6IAphWnv9mCw-wAZEd9CJ@MgMOf~@_1c>Faj0xw;QJQC~f zbqm`?<9;qG!~qf>6&vdS3=iqiqs<$KQ2v$(4QEx{c2%&Pk5-%Z`QInT;Pzs=MPxl` zUbf;A5_k?nQO%h9AO(Kx$CI?&y1Ku&C4~wBV7*8vp`oD>*s3m0?ZbwaB@0^Fnl7_EX_(`eo*^TxP;t`$!ZBHBmW6wH zFL>_<<=w}BK7k4KR7j+80%|!8lo2RtLwT2M-1u%)`rh7Y{`jixNz2c}!%Lwisem0u zsQU%#a9}5IE0F%5(ysqoEr}@ZLR>*TzsKj@*UBmgVJwiCGk`?RBfCqsn!)zo2Q=O| zL3Kwb5Z`wYhj0JaxGg_!|JMA4F)% ze&A9NuiAhphh8{Y=#zLC5lLV*Bu$t)c`r?RBtec8ycrX^7snm`*gnis`flcBW z!IcIl3jrJ(&x#}{daBST%K?NW_3a{=J)Ke;fC;1 z5Yw`jP^cU(SoVAvc>ed^Kf>op25fZ=?d|RT%>UM19n2~S($Kt>>%i|xxV?*s8>`q{rNaP`G#6XrNFISIhRJx^DwfF>uM0fk9L*D3ncGTU$2}juf^c zdCdDmnVZQv6F;#wHtBr){1x)>1^)W%-DzrqxZILo?U4p%qsW_=W?gS!V(wTU$whVF zXfHe}o_CV8hgnk4ko`vgO&LpO-?gBnzq5OmR#i<6OJQXFjjgKmy%?3@Zl*lpRSSqGZ6s6d_`PM1{|Qy`I^#p(eAgQ$4K{ zX3DVu|AkJ_ALbsBc6v4(Wf`V$@nYY?og!q!^5;G&WjT zcn$z${X6)%f|nwFQgDKOCffWPRwV)UGxRxj82>D5#Aa zn$@SclJ$(@#@6CL^ZBcm8H;WYwy)vv|JrtA2k4DT5UgFYX4RGI(jbHdE$))(CX_>6 z_hwYiv4zuk_ybb|gQ$P3pQ>rf<3>&!`&dh$6)#CV7m)H8Mwd9gv%Y^{nvj^zA8I<> z1gw*YMilbDQ3wGxGd429zJ||U1+fV(M#?BZscmuL6oDLbg0PnSb`)M@oWi-i9qP=n z5I*ro>%LjzGU8WuXR&2hD2@TCav!9GdD(Q;X-MQ7mF(FLi&1-8Sy}n_&}i@Y1$x_u znuS}!mP!h_CKc%mCCx$vqZV#1D#0u5KQ&g<|EjU#w;KIB1&HAbH-*Y>n&x$G>b^({ zX0iNBa)Cno*cFB{_bLafD`eOHQYSY4 z{{0UzDDmUN88CCjP$p6ghmLO=P@<~AtF#_K2$vsub9?H2xUwsrq0_#}8-;0%`NS=Y zU3C!kk7{hX^utRHzPt6=Gj zqsYxUjhr?MZE*u3YmqeesM6D)W(`g|>U~33_kO==Xm2$ZF73LRBd(L>2&}oz^C-gu zPA9(*N5a44QIU(?V(xtpOTS&rl|DhF>S)b

$_p;*uY#=x?SU4kgKu#!2>Z6~}Qu zuMFyRxXZ)4^^e)aW1nvfZmcDuUF!J|wF|0P#^hQQ$cj97B;ng{)QeH7owQglHU%aw z1_!OAoptE)Fqtb+@8uZ_b&kjE_Tau#;SU|qEUp3y z_lecGu1^`Eg-RttHlF<(FZuYocJhXnW*%->xZaklmZASeJd)4a;BBm(9eM?(aNTo@ z+OTGH5R7g)Hfb?XB)(CJ(zkCfXxzM!?UVE5TEkl*CEN=i`3W{}kYZwi=+3osJc^c_ zfYb=my|9a4g0$lwXk1cOccn$Hu08~J zL<|Eu@7?i|DflKH(JF^nMrQ*h?YOCaV7jLJonnb^Mm7S6{2nhbRch0<6Q>Es)f+Qf zsBjWuy~CkwubD0252SF0uGcejZ<*4 z7prk!{Oq(n7qlm3k9#Ly$-SL7$C%z2Os?2@H$U>to3frmA&hi{*oFYCscNULIrlSm z75DGoFI#YDCp^9}-0jfgV8GM-G1Z06y27E@+2NW$@sGY6SnBF8cSB1x4yD#efg1+P zwnXn~@v&Hk3J2%q3ILSs^M#tIn;nZ#QkA}q@NKUc>sQ1# zkL8(r$YHmA(l=M9!}b>hEfm`(cvH;R=as31zoQ0st&B=@rDHE*tJjp>o`ouSzL7h8 zYtA>QFPeAM2xk+(aI5Q{G;MR*CbVYVHA0hJa!@h4bE}wv|9S>N*9%Ury;1K|DpkVr z&{nIc!EPXe{!7fxVBVGpKcSUindh^fggxW#AL*5M;dj;2X!f%jV}mE9@%Z*`ngVb2 z(rAR7@4W9>Dm`D~(=6?}x?P&)!x;_@#v4b4@PW?QFEjw~n|LEd^cNGeg^M?oRuW7% zdLfcz)d01Phd8DJ6X^c6dwsTvSZFR22MgDWaZ1Y_-%41tN~$H3MW161&Ximd+$ttu zb2@)2?zCX_geraIbg<0)3>lX$HOOp=dig|PE zgJ)?iQy1N^rMknXMNZYH`^t7b*?H(!sjLb?r8vwP$Y?4UUKv0Tdr}!gJCl&Y6A_go zQWbA5sO2-Th2Kv2ikqBc6m^e1gt&1;-qQ!ZuT}oTW$uo5=3*A-Fvf+@t1U|mW%!sF zqVbUfK;XhQ(bET}_O+-I4VD8^vqpdDV5L6P$;ZTwB7AmHvwWjZ;$i9Hu!YfBCq#wS zU%b^szNv$9;^Od&^Zz76jjx@d(W$JGCo$#!BbKQg>(d zWY0*N6n~Xq?F|dLFLi#!8?Mx|RLu%XG7q}pt5hLR6ONA6?4@I`r6o>)-_Lux+_v?x zC;c+BtuAN>`auDRU_qkL5|JM1;b?D5gQjr6hixvOGe$*X$KI<4m26&+|9rnfb&VzF zR6R336BKKED-qWqBpc)G#yd_v+~5B)eaFz~lcPeoTW@9_eU=B`o`7u{!r2TUmH=ZEQb9&26=CB4pbw}A>bG;>Q{nCxg| znQvQL!Q%06h(E+jUPq2}m)mkUrJYiw9-ukiWxuFyple_qUs39mKOiVZi0Zc%Cod=# z&F7xN`ErqB@ZV3y%3H^f7k??x58~E;ciiyk>k=sAoy*F)ToXS;i|xPb^=Ept|d0-j_pw=29sD?`u9qb2wwW{%i{?(EI zbv)?>B;ZkDOQ+ZQn=E<3J3KH398J(uv3_|M2qGHzsH7YI+vLP9oI}F=^0EFZ+%`G6 zD`n+JLM!3h=^ZPfgyo)!CPcmiwng6^>(55xq3Y#m3g!|VZMrs8UUBUX95szUipQ$( z;V{L%)cQ$mBB32uF?^G*lAjkDJbYi9x%S<}1mk6_E^8T)OOtlIe@uFBRu*<5MNwQ_ zvb0)|(D?R@kouo>GgC#W#M?Xa9)bBXSkvTUG$fdMVd>EN&J z-Wu>S>;j_Q*-h<$rzyAGa{{*8lZC*Blk#2~qC9ry+ZV?&WZJ5Dx5+X6{_OUs-Q8eh zcjLk3{cUG;WtXBYrgAsTu#oWQ@_eV9%3OcP;`cW!Y^;P_>7E^GC#%{WzX;4>r(e{2 zjF)p7KCz=h64mqYF{9@8&D5npB2jGTb`#`ewh^vT?ua!^!OoXS5Y2cGf8N_1BKEwn z(#h}pp}$LWu(u>!@Ong6W*nh9|3Qh*nY(1_do!C1BYcbap3gegM3mU=I=&;Q*8BWK zlW>*w)4p-F(pj5Vf6TZt{)hK&mnRj)FZFfC)+MWaP@TJ?ej{ydEQW&-bd3gw%;BT% z{Wq!clV^Uuy%*-fQhKD$(%bgm10=%W)!#OaXhnubL0{7F-)_^GghvU@wYs@NNB=&uENun6Gd!ArfwT-otvFqJ%Y-&;gr*VW9TjTmhv^n;`>0z zA;4a0(_Rt||9u}30G?25HV$rgeA6d+|CB#&{-apzOYsYwS3*X+uqAz8*kjoigg}B3 z8J{R`r#Y9)z4Phl7`O+&EYP@67$x_GXR7(e0qBKac@g*aUgp}_vAr&*QaaaUh+KA` zI&b0KdsShYceZdwi`9EVA+$tNtz^DBR!#WJPLTzp?0l!PkWuRovp7Kq9{mqqv@g`l z#{DXJ(%t;ue`$}N*gmE>&WDfBhIsF zi|WC8P^`+VydTPT=m_sg7k677MwFTjn=_YRI1GA&2}c}7k&Rb;&&|540C=(0BflPnN`UUZH$Hw}R^u!isr;Vjy>`S_EtD7E8=PLWrf+V z$I#BwXKvXNvaj7d{2C5H`!TdGPdcO*JmT7~*D)M^89VvS4KN)5whYO4#_jh3_GWopxW6tOm0m+z3sz`PUG9{o;W&@L3s zo+mFn*a$*l-zpD_aZ2yOZQa$1AmjiCndN-|v1J^iuu&3_t74(R*iFSZ!1>?-rp-## zsBzbk$q#xT0eI8l-PIlzAyL|2Sdyz@fw(%l%iV0{ZUwZN;>zoh44+BP7nAneVD6I# zzHPYpHvhJaw{Q1>o#Zy-BdX~!Adf|rdchcT0K0=?E!d!^hbBPMjU2&-GT}gH%|W)} z2cz4a=%8V#RcV`n*Y7z%S(x*q z&GGaQ%qJvLj|3-mB_M2=0}D_0jjLY`XagL9vhY$csrDt|GnbjrC9;m0&q9}^(Ow;W z{rZ5NSIhuyrk9uh!(m^}`p?@XHJPx}xN043oveyX%ht*q?KMXWW zE5z*(gEtv~M_=Ly=PDLs{GbI@tS=<*o)0r%%g^ZgOLFBMa#dFnQHIjZqn^(Y476ur zz|32hs+x*Xas}nV2QE1WpNHU7%?JLkvrFXs`3^1R<=>P;sxmDI$F>L!0sxaqULA`I zU?>^Vve_-sU<)!p@ChU25$dqsPJAEb`^5pcV>0CZNuGQX zH4I335dyeZBa*;FM|Q(Rwx&X$277BS3HDNXs|(8H8@Kw5MDYt$v?t&5bo$il^hX~I zb>;?7(mbX+ezJ`9Ng^5Ug}s4Sn9aVAuVK~MFXmng@eq=i+2>@f{*G_d#sqX2T$K6V zxDh{NR`DTzUcoUQVsc4c)d+1?viY@MyS;GsP#f*&pK+IaQyn*i5zfL1jAEXe_jo6~ z#{tT~j{w!M6nyn6FvMhlQ9Ym=C-`MAHC*k5xeaz;-ciM3X0-rRoAY$ zdtcs{_xP;q9euO^Td!ETGFICw)Nf}vof%h(QSqzB0-HKLc>p#ZB7J|=#g5_*@4gQa zr9Z+$@f=5_nK;Dj3QCR&FT=Ti!9W~B?k_hoORg~)2=wv3;YERf`8$j$t|&56ZYHGS z?Ab2adn@h=)A?qQpDfI-L8uYsNbtd5rCWLcRqfv-A;I?e22q;|~zB0OnETBS_CB3t7xR2!z>e=_F;}Phb$50FzvA z{n=VZQcg`UMrc?bu0!NRSooS9E0bc^0s8-AY9b}k(Dl41ic!%Xh%r9&Y|S+})obkk zML&9IYx#V{c|o;rv4LmL>MwTvzTvU_>>dMw`e8E6Hz&vS`FE{LtLIykuu^`6M6A+7 zH}L`;^P@CFJ6Si%Lzd6z9zoCM%or}RbLJVAYu&-drqeJwyVt}=e1$9deN7IC3)}yH z_~R^-7pH5j>(PSMraqQaaYA(V)Do8HigMS=Dk3u^wFnn;J>kwj$K(r+1C^o7j9laQ zucZZ|SO)q0khhbv*YGtE87lEm+r9S>ZST{61FA>?zx+V*ROv= zYIP_B0|OQauo=^~*D&C{4=jDsrFo;t!G{U~aPNfbPov=~Y%PwZiPsum1Mt`4v|WYK zQ2}AJQ(yw{H0(M6j>f@9_++0L5YCjZA<*jr&_RsSZjY?$^H)BOA~gFGZXtW5Dn}sG zsJCy&DZ3t;G!TGIKr}fXkfgAz77(Q|SL~{Vuucp}ox*KgFAGuvK%?4}OV~<@usC}H zW&1Xl2yv@+H_%&+ZkpI;FWSWEv<1Y>7V4SGI;CStT~YkRD|hyJgx%G&=Qg?(D_`HN zuw+(xSKu_~KFrX2=R^a`x~dD?P6csS|NXT?V5bw-Hfh2~@2eF=!ORCIm3Js+a_wNh zQOx!l`fJs)ayzJr4*>u%AHSeIH2US_@uUVU4ECBnF2a5zOnC)^5I$JY4Y?1m0n?8N zY#i3)CeH@O3SKL_SSGEcPOUZ(=z3DE%vRd(`dl#g<$I5V4x)vdS=a!YXb z#qDKKNI&V$C>F|SS*1GFU|}3%A$S~Xov_Q&Oh5rV17qpf)F=My=jxgqK6*Pr=f)4X zK~w!yUlDqj9r#OJg)%zH=O`l;i|6WHxlSvDYoZeh z4ield<0v|*r@rK4rclngzz)s*t^L*)7iauz@BcKf-Lm3Bk4qs)wd-o1E zR(-iOG#;PJf^RxFH2SB#Dr*@+uv$onc~K*?dj|4&j4RI@GoRgbn7DL_DKRvPSA)N} zu8GU8KQLBJqvHWYw&2!=5EV|gltp#F@9NAoDs1;L_41h8X&4wg};-y_6+@tc|XG&=nZAh&^c()96~olY zVOn15vv)RSBP*L&=lyFX@9~~@db?NFNW|CN=ccQf+>0x3IHuCHr|o7&MzonETa~hb z6|TvVjUdbo_dokkt-J?C}B0Zlkvh1wu1O z=(bG~;hWPa#@Qf|OgJcG{{}{weeL3-^*j?Ci*pOhe!jW&IlqJ}7Uln^twL>MeGQ#& zT#U|Oy8;yc-p2b-w*-V5M33F!c}F>N+N5-C{C&71V%C;C zkp5UL&i~Q%_Ntuh4_gQn-Ci%~M{<<1E#@*FHNiFoV~h2rnTT_Kddd%_`WLgGcxV{b z$*%NJiuFABB&Tx^CYv`R&4OjSd`8{Pibq#4pPDFgCAhToXq!k?Y}X=53bonLrNcG> zu(yQ*&}CFa%x*yD3iXip)KHkxJcHUpBAS)ABRN6dnv=HZ$XYi#N;gF{`-z zZjn9kvbuh`bK7rE}hvYc` zCeT`iq*EZ*%TDk-r1-!Yi>|SN%->v&gKft2jChY2r2Nm#&7T9Qgjx6t)#InGWLWzC z{P~325hT5jC2xtw7=rGsN8jVRjg3tOZU5{)tij*1W914AiG(n87ZRNp*klj5zh#fr z;b+yZwEChLY}?FyPVfCMy2H2o{k>1^BBYFeu^p*0j`WF0+=Pz=5iQ4@@j*n!72YBP z4$K%zBO$fNVQDhHqk@&U8AF90(Lva9Ai{2wC3Yg@F4DX0kao)dxV5C7-?TS?;}GWG z^9&}>8pBY)IVliLYr%q+4!QJ)|8walGC6h%?c~(aevas-cAU83>RPztMHsmJy6_uF zN(w?88XEe$!}+)fC2a6ty`)+dP^RZ7^(H6m<5rs2VYT?J4^PGzc2PBSbzfyapENXc z3DSIf=A$s+0B)HC-_8CdSzszd)*$wD=%3h* z`fEE`Z`_D(TlvhbF!hsL>{(O+XnTrGGmszJeTDwoq_k81{*4>YRQ9Smy!UZkiKE}& zioc6OX|@&IyCw(|-IeBB)fVB%h6aIb1Ji<1AzG}dreVL zrFVELr*<5&Iz~1pQ`$~XUKEv+>tXowGj*e=X40Iv=RXU@tFJuwA}5U2=IYhtMN@#?n6Jf7Ai&0kkL~IDZVorki1ewl3wS3HFhCWh+IMI$$POTyGC@>rk*WC)Rf|qNW|LNYQ zY>1RBfu^ssn}h4XeHcN2VQ^~&Eh6;%*{CqUxUuR6h3Ezkh#dNuJZGlP8@^R|Nn0 zEB?ACsdkVHKsj>26Icd?Zzp>5OhGl_DL}r;9?fk4jTK`$khiVycR|A;w?+%NrX)#h zwy20mxpPtOp{*bTSm*X>#^05L=k#MLemh#z5$uRXh~Eu(te?4mFFgF~?0Fdo>p2D( z;&YT>w`$`0@1fqdXTkYHqb~!D`?c2 zx!hMmHhXd^ay)twW6M`Uy%(+oK^&6+&f7w+TMkjmxbQP#H5S026?AE1mlH$Nqem|% zIte+qyK~yKT_BxqBXZlzcqA^I3Yd}WE~!?Z3%hyJyi~H`;9E&pE~hqnciDpCzlAGP zJ8K?UOXse4Z@VCqcz<6m0IoNE#SI0=p-1!AM}`d z`EysF49r+I=xn6(8JZ0dPV?x*-n@f<6B+f}bb2-|s`9QbZ{uBYDRF!LQ?`tq-Iqoo z+zxcsTqbRQAy%Tl*iecRI*{=}eYoQR>7ad%k57nsX$bdcb{0MxhGWzNPK>UUrGSuz znTC73=4)?)Aq3xz!NehJ2!A@4yt#QL;qo}9ng-jUgZA+cnVI=;PkI)n!6D;cqDdVA z=U&hM;qJYoqRh6gUsMo5K~#c@h-f1!NhK*r786QP$x#s`6+w~|k_1FSL_t8wIphk^EWpV?^5yK=m*k} zw_i8URt@GZwW~<(Sq{pzDLBj6a{T1eEom*D|5nZ9+)V0T%T%kx#XXec%+_SM(IriKnLA# zIN89;7GpVucglxah*;Lcz<_`SDGAc(U+wc+3`r^h(ywzQUJt3;=nwT^Qqd$K6!+ta zIo`Cw(`#=W(`VZRTW#fY&}*Lp|Eq&TC2g7T7#yfJMUkcwg_3czue#~{{r$m7$i~8g zQSHus$e%6Toy;kzG=&tX3o9S34f@ZS*W(I{dzS3s@@Ps=)e9 zKmL_AT#!bJyuH6OINfio-AmkP12!bK4ytcvU=NZm`7)`VcM@UN%dw|Vw;b8e{P~vP zZu(jOkx`g^Ng%!+8ZqLSeSYJ4LGRLsE z8vBktQIB>|$=JoZij>{-r*QdXkB*r0o;sNupZ|#WlwQ&6*Exkj`624J`*O2X76luM z+x?E}Xhmw=%5mcC(bxZ8*I{^wg|y^gd^|Sb81cDH`{Fgb3yd{+&ssac<%MPORx&&& z6xF6p9m?_QLkd!tQH3AC@O3k&C8nehU{5lNZD3FtJIVC4TWR*>t>j(+zD~QbK_d65 zQ}kNaaW(kXipt;&CFTrKq$Uc*^7fJJ)Qr2Qt-#)WBJQ}ijbtG^h`ZpC5Ad3 zEmaer(zl`ANy=WLlA`yQ4Z5YWd_R7uTb8#-%{pye3f?@eo7c3bj6^Po*XacA7ps$s zn)j@PTvHs(5A>{eIhkI6T#AaU>l6aJ9n2b9Z=Foc>&Ak6hD&Wi)$ z*mJ>r(HE*?HNHDw7sfnQLpI9-0me3a=Bif)VgGI)*5fBU)%y_I?Zh?qT`Q~e#c58X zs4~OQQ8$7xLI|0(S!0yKrv<%!eGek1r=@NJ%?;`}nHaTVPeztTzuXhM}s7Fxf- zAlAL0-s zogBR!t;(70!F7m_;79sTJ_6D@i#ko~lG_q|2OeMPaEXqH&?(21LS_u=D^0@o0tb;6 z<##~y@IR}JKIzA5$|}?-Z{D0*HUIfn`Pzp(#7gKzlnyab_m%6q4+niHU+k0#jWMMA zI=csm*Z6ixl<+p00DGajMfdJd^8vMjI5tKW69c6;Mx}3Qa4mE?I%@lyPq!PW>sg78 z7jg2PI*zA^-+L2cvDN#Hz-vx5R^vq?rLbRe>ZrufH-bGOpb5@y zWe!JaCJG0g-_fx3f?cwo(eB1RpD2fsEV(wwRe(k0WvdJOq|IF05GA+Ykvzy5`4-y4J@;sOQ5J#EENP zn_39BlqEN*mlhpok)FnBl?j1V#jcaLO-SC-&~9^t>QL)ZIEwM#IyVC6>>|`yLZa=> zBVlo7%*x#gX(ar!XJP z#)(qzCVe^Gads`@5X9E-^kE6~Y3wI;2kgX65{P~jjnpF#x7I65uTBcNK$lnV(}B3n z6A|ijB}>lcBFyVW9(K=#Ju4dJ1+O*cTA47=}<5#P=4 zOkWb!>~4A4*bUY00by9j5vDf){ji(!V|;odHmv}&TYT(@b-xfk%gA(_h@@n{`@M%_ z3wx}rqUK54C5Vr{aJk^&fHuK63xPDPKF?7C0vo>TblVGUnm}Pr8nVk{o4IiK`71-m zVUy5de92*9dsacd%%N8A)@ylmIg@S@Ii!}mF1`YVi6MWCll@`WS!<|zRP@9 zZd-VjxIi4^6Z;U6SP_-&-@q07p#DXX`(H#&Jlgrlu*XKJ(N~}B<)c3v2yoFU;T~Cj z2u@8RoL2LYFWh%Yq7s^z`!s07cQ#ey-^Lr-#b#3tfYW?zB0QxUz`Z>Zk87`Z-hqO@ z?@r2A8H?n7#}2#Bs}jIKr>Ez+bKzXZz8hN$iPW<#g^c}i^1k?eU1|tkuSd%)&hr&F=Ko8QZ|R;2Z2syv-{s9Z5#npAmTtbF`M#?)$Q4F2`Z zKnA}-Q@Cjl$v%mJqnkvzP&?_7&_t1(o_XK_SXR9zN4$~u`b^g7Bw33y=jRemziPuc zUh?N;l<2+pveSQ=P4Tw!x3-&=?r9EQKKNeW5?u(>1pAX15XJ@Enle>gT82Fz4(M2A zjtm`_G8`X0n_h%FlzD% zMO|0yHG0-E5BEb@2%842NB2=k(LvRYs-dKf#nOohF1j1Hv&>T2XHrh>`si|2GUm2O z_u=@P!_mbf&ngEFRx0QEZ&ZbkIw+H|{#KK2vVvHpnajU6t{$1Z0A9ahfjzN3lXdOA zOJ2Id0b6sr*xSckoyi-~r1tpw>?f=Koqov#FF(PGpUG3DEhP1WPckS) ztd}{UmjB%EdA@=Qw0~^Acc?IxL>Iq zmNo+cMDC%s2V)P6dvxsrR*5v&RoQz0r*DMdk2~9FF^1O4OLgBSBKL5z3}X^mHha~J z+XsBQ)IvhTLhhtRt(I_naY#$YeSfXQ8u7B^v*+m|7MAZ1hceLV?El=UE}vi&_#c}4 zhvYp&b-gLUQn`@EF+hQ-JG}0Ads3XevqcCpN^%}Npxs^lk2duifrfZRHYe~fbM8F= z+#+aGlc8yg=$U9!m|TaPvSoFfed>T+m8?Gd!Gozxfb;Bn+^gV_5aYkw(C_Z)-CTVs z&zMJqZ~*-f zMXeXn&bH={SE^aZ{j&xA&kbU+)LJM8K(KC&(j7l-AzvYT@xfGt$|vP8gWH1isI=e` zrF{wUYt z9=XUui^&q!nF8djToB-#H1i_S-t8!O^?s>3AZU~Zm$&;t%sw8+bU9myM&kjUmI3*QT@4QuW z9tfUl532A{YMQmtd}}a)8B)_J@4RTeO5ofthmY(O9Tqci#aLIYL=$`yFy-#?n;XIn z`yf4=hEgjn1FZv3*d|ZvYniLDsNHe5_N!RDq&!B7u#wDJyoe=@y)jjsVB$=6mW%N) zUtLOgCTjeX?Ke;>!;ISFbUP(IR8~ARWhzd?z_rU-OByc~{r9$Swsi|-d2!|;AvVvA zJiz*IjlKHddS}w?Eq&SOrttNIfc?0DkM-OS6`R$n5BE?^>{y7wZ-)QxJv6nBf!7&o7$rRmFJSu?O}HL+bgsL6@9YGv8la&BlB0jkDC+|?3i z1E6ZaV@+u4X`68gizQ+*+E0vU?G#dl{~vw`BjKi_s#T z!g%=|4Yp*ZD}DuA0t$UIEnB;S2=zJ-e5u)UZ67A*xd{3`Y>B`Z9u8&xKG81re9n83 zWPe2&w{)dKjIwWe;5{-hgFtUp6$!w8f!#1P_nn9mby!rU4&)n??O*Ac|nKJZPok4<0G4;g)7GRd2aG!Jla5p*8pL>(`*ItzM7N8+PpO0>#yY`~u|rtB&nz`*@d{;N*dZZ9 z2y?o^H(*7hWaQbq?ksm;mE6e3)UJ{2CzBVG^)v?}_DC?|7&(HobY?ndC1(UgS_pX( z8bMo_w6yQjdq?b~rzc`gT!fy=9}C_?jH*SMax6w<3d`5peX{q^4l!D#PpBr#0H_}H zkWJWTy}r6wj2KMV>(`%`XBG?C@zDE@07M;1s~BfzXR9{?1uM`_K~VKu>_-{qBX?d5 zl3ShV)0(rVq7A1i+%CTSX|)^YSHdVd@wLP9yIabWs>zvNQtz4YoK$-HSF=|KkkAj^&Wl7dthgD82_TLa4;xu3YLuGagLs1VwBMe9}dhy0yF z`~|tMC>1`q3+{QtEd0gBr3bS0e@Q_ifRd)>KY*pm$HKw<{I zoUZ6EXyhC`#E&|KRNk*0n`yC6J3ULKTvvKwp)iH6ll^<4gV3qPE9qNcyHIa_GZ%h#4?ejLh$i>|5TSz`oUqMb6^Be2R! zS4>AGFXwX~&(z-gEr?G_H1C~_#<$A)4?Ms6v$2&(=qYf#ZSS|r>NtI!_{GQ{wwY{O zA;@A5r1Q{U03iXqO&MPKF-CsRtzWhP>;b6(dx6g0-~33`qY!wG(@sqtXgkbd`(cj& z6-TUMK5jmmv+ZZ@bDCDXX&!6y%{fkZl8t9dxoW+tahGY1`1|>-g?@bo@1ODNcxZTU z+s$Bg{*zbDLL*z>-5ffIODj~qUhtuWS@DKdh2Hz%Zv-7br|RuPPiIW0f^KSwT#QI@ zjXSlHxev8lHCcW)Fl0WQJgsf_>`}qz&)1O2cMAIBjt(4O5G@}She9EQ^T~KD>HKJ< z5$fM&kbgjX26wMvFX|q^!*m>E!n$;zY)DVf$jpl|2Xl!-6xV{=8s&5PXCwvqK@f?q zbNf=X4Wpx_bpYgwpms(02YAR7BzV~AO*AZnk2}4;fs>8xKB8Vv<-TXk&NiHfO^_9c z+$uBtv5wKAn1huPsJlhw!Ftv6GK&wVJPXK)&jZsRPE!C~`dx;miv9TP#S0vgE0sA; zO-!J451{t-q0-yLM1wOlVjMx>gAxzelWXQ7O~<_Y*GBnf-wa>|_wfU9tqDpK`f#?9 zW`33EdQ*HHJjGI(3X%`&>*`i=wr2U%HceD&&E$~o!+Tjn!6NX8RfEhgF9&EitRH4! zF-CPXRNDia@7R-ZnXmAv?m&-P1G5ei%|c}FK_X*#gzuXgFde9p92_x`;b|h@TR~V} zR7gW@Q4kC3-Qfifb_OaUT1mC8#r8Z76`U=#>dB6hc#EFF9pNb8VH#pjr4nY>nU$&$ z1wk?$$f~Nr#<8pY-N`oDIK+KAnG$(-bHad7*9SGf!9doMbb@8U6YteF`SD0;2)7|vT4(V4*4pw}SXfF4Aiu?qz)(u{{c{O!v~Q_B zV^r6R!e@4;jf33^V7g(CSU66LBPY|QjM*oWri~kX5%j82xnt<(`c2HmX1KQVdZHkC1Fj0>4RoPyB?Naku}}8sJ3+6B7_Kb0;AUPCK0+P2 zhaP5J3REcZ;NAMV8@@pqqr#k{pwGs5nM+Cv+ZUzRU6V1woZ1plM6-#2(q^3E1!t8P zR0<9kfD-fc(P21Z7F%SO)`5&}qWViN@h?zP#gKuu({(wScc&X`fWE zw2r;efvZrm=^2eg<4^`Pbek7JQ&D`Z8|ud7{&FXbUZr?7RHch(x!_|1lmXGNOUi0J zQH>o|&WXI=@VjB@u@JkBNxXIc7$z04RMUHt zkQOCF5daVTI=C3YTx!w=} zG~{|NpSV3Ie`5C`-v@`)1wX>LS@U_Etaw7dnxq^cal1qEc8~ z!F7=z{I}B;b4pHf2zI0sw-67f_woU|2M>t+;OiMQ!3-oOCTfCsNE-2dw*0vho}ON~ zT?!*g3k;IKrvnZ!US{j7sTm^$NE8&^)d52FWISkn;@w#Z2?DH5rdiArefNTIOhO1! zI>X{U5LRUz=F`lrLF<-wLfPO5@Q@HgK_#bY;tODH;ja$@JPBTw%EROmtsn@#1sVj) zF8qSCoQ3j*AKv6)E&JIaaMRmYo(HucQnezJ0(z|*^ZjpLzWi#%eez_(rNF}&xJR80YL&}Qf0Y; z5C{Y|33)iYIj@FBi8BC~L|rJB8a>==DC&C^LQ$rmuxWY&kVb|mHf&XjdEwp?fN4@F zE^u*UMRtH1D!79A@$LP4WymtHh0SM26mCM~U_qL9$SsY6Ui>B&$dQypfQpjr-EmdPTFI+f9j+bAM;wee_DlcN;z(thDb&5J&C=yy>lfU#?4IG7aEkf?lQr1WMK1<`gn{ zp252$f*?6SoX*V8Z)eB=)dgxia#(JmicM14F$M(!96zoX1aNXKpp-E|^8C#^k6AmX z?doT|)NTsS8T#Wa%)?mj6xow}4R*5M9|N)Hr+>T(Aa;%Ht-I;tLK7D~sc#ycmaT<5mb> z7D&m!Eljo!h^J6;Cnn5)g-~GvYnx#9`-ani50HE6_qm!XRvpq9G7YNT+?q1MAHKPR zV=7F&eBUPmRp2#x-gR-hExkL89pi$%d+hmk?v}1&;n!kG;HE?IbQ!#w!N59$sN79R z-H05SA9}@BE*E+SP14e{^DnszKZ59qANceh`xqI5IetMimOq2tN`3bfyp6vP6jZsF z#6Rt2XD7S$yklK>ge=1~q5NiDVIUhU*EpR~votRj`W_L;BL@y9JC!rPz zm(T}w5f(XMS!#{+&L0IXq^iEY6-2yEL+yu94lu}n&##*IJzMjEBnDzFq#e*;qlpFN zn!Mt*Yv1-3z#&1F=Lc-Gq#OZgQeIXoj}|YqYx?~8^EoYefs4<(<(aLz8#F<@wxL4B z{}y-V=NwpUKnQ13{GT6)>9+U@JLuhLLK-HBHmQW&VcqUdyT!9uE_ZRFUu(duEXyTd z$?|&PNA2sHZu1w~+C<(>yh*i@BoDN(v#{t%@EwxI>d#!|$MX)ZT5NN-?qCcX>wiWE z-jKONZ5u8_O$m3_+U&^Z)oK?=W6m3Hq*>Q#bGLZ@^k=A^5>b^<_#)nBF5kFuEsjb% zAJ|t23C4Z=c)J~JcxudZ?{AmMNak42%Npm&Hn}eaZyp|UmsbrCxGPNe!LyxWw6`_X z2>D5Up?Sg`tb&ZL4Vpn_KFc3TLYGDGhtq@d?!9GU3(+=KP+QtiXPKGFI+2l^-%hXd zy0oxu)hB+ebmxPe7NqYEx3$!Oi2}g@r?2x=Hh=V7OwRwjS~&>f@lpc8UVl7N9bak7&49A}D0tO~A{m!o|7bq^F zL;+ICW1D;9lB%yAepOO(qa2BDA&QbYIVek}?gfX1!J{8O3=lQMi5DNc-h)uxxL(Jn zdJj6%(2`eNs5_CDTvF26EV;6sVDEK@0wSqp5Rz=3dek)ZhNYZfFSOu$6y?4fAG`EC z*V9Whx0vUVk?oHkZvKpSBSL-~+Wy%j5I#33^FQru{~whklmCmIt^eUrfFl@V%Kny; zgju=_hiFWG*rzpROlct{nuIh#j@lC<#I!n8A> z0YoyJDWJv9`d)gX-w2beB?|x9;(~fO%)*<=0l1IIsI)J%qGbF@T%BMpPcsCRJ%R z)hQoU{iUE*94p8!tOpQJA{P57%AwYO5fz&i(5Qf zThNq*DYw!q*9lqlH8ByOObm|4D7agzsyyfd-{92&+-MfX%}*m# zQ`fTM4`hdxYKJt&zX~Q>$5vTdM6Dr8wJpT0`~J&i_IK_DyD-YVkhAVTCc)GA#JVg% z?V+@TS^troBP}LP8*P_%5$e7T?Iq?HWdD!gi`2liKD!aTRYINCwVQr7)-BT+4qW(P zO`|Q%ksv^LKk*fMDt4z$udJSJ7ANzOyHb4D7o*uHXV-V*9?kF^ zXcX45ZrO3}6AA-=JQDu2@zFwgza`17iX&_HjWK;?H)g1xCbZ+=ktCCwnQ^{U8? ztSI*t7SW+{&i=wGFuoEUgM6uJs3Ds&aEDb4$K^F8FYd3$xe>6C+@ zMB1vU7K>)vj_YA%yoV2}R|`0B-YRx=u#c%RO3EwiWSpDe-RWo9nX2jtTb1%b>{J;8 zk!QA;dP6oN!*+ia=g70}t8=v|N((}|DZb#gG1J3Lbf%%a({ifsiC2TyLX+E;!1LurKuWalbqHOF&pA3!|xJOu&Te-m5pztbQ1BL#U zJA6Vpb||+2hT;yfeO6XrT1VcwLUp{AsomfR%8!IB1->(98dkS*6Dx4`U(ZOL>AO2* z|C0Z;wbqT2yu{R+px~gjJEn~GDnDRQ=OgD5Xa2^<($H?U2aV0mqI&v5U;MpZ0tS|$ zZy@nUx85GQ3(AIXCRX^mDlOCW1Gmz9dU0Tt9jgEORgk>x=!C6yTzt*L!kanufAR2? zW1-$HVG|V{Jm8#*`I5+A$=uQRBKGE+W9e%v?maEeoWc-uLi1b@Z*k zefO6^Uh)lZ=YGdI?%@)vy)@6)R)8O|Vo`dbU7P1?KZrF$}e z__uoI$thw5B2os6D4!o@c6z3tC2xMYq-v-uA;dIFbE$y=b&<+}@hjN{6}c^v9o(l{K#q;dwt8GQ>#f7Dt?u47II_iSwU-bhWtB$~ zHFfn51I&NLA;P0zev81CPCvi=$F&z$T}g)s_6hua0wfnn5;-#?9WY=`%MDn2@;G)d zM}pCzM8v5%#_?11ea&k(UBJ?4cU&pb6#0q>zCTeLa^KRP!`=~eV#NyJ{PVXF(&oj8 z6@oW_@xgfe!ABlFH~vc2cypg2)04DwDslhWdsi@qLYnsZp0y%bKZFiUniUDmN1JgO zEK5$%#JF0(&V5rJBWVv?)nM4wvy zbFeX{Ea3G!H{Q#XIJds6qO^qxYFkzmTd7;e%*Q{6ILZiPh%vhxVwj&gI@elN0JX|%g0~*{@DKYo8bM#$xYf7ij6$+di4Rgl*%I487|KR8y(Kb>6Gw5{h zlqkW{2*sH$K-t_{Hg%8HaUzj*j};?7yt%Bx{?-vVmU(>Yh_9aZdtLRC`Dr%$Ud^gU zrDQH~rX`i#ZQ`ccxsfNB<6ri|p?wIw^PAXQ zATmZvj4QCu#-H{UHDnIor8}+aY3{QcC`r-O&@k0z*G2S!4FAWB$d0*AHu&|x!DrX~ z=E(8KUGXySuJS~ES0{GKzaDdDuGn#55e~Csjz>}#t36QF9Bq_!FhwC5rE$IHx)!6{7O3LLfw5SCb7+q<$V9H79pkK?v_iQatl391~9 z5YKV;-XF!;T!(~VF#$p4Z*iV8SaxlZ{?P^KQ4Z}c?AbPUu2`$6L#g{Vz_kdhrK{7O zx0D@ts7__34JUVRW1+04%pM;c5kVuSZnL8Xj=lD)WfD_`!GsiHVAUYp^+5h~AqO-~ zcf&>|xfxc42@a=LQg=10U~c9l3vMC#SQb&8LF}^2;wSzu1u^dH?{IzQI{tyriqo-! zBk2>@RyTZSGLdCIMKR{LWq#vd5>_yAU|xv(w*R=w;dQ%2h?%fpdT0>TcbGk!V$0{< z*9Z)+7{zj`GOkg{WT?4wU#q0=*}cQ`SeeyR^*faSB4Wm=m}XlkE7H$wf7}!wWwEf0 z6G7Q9xmu{miMPo+F`MCGd!&~S|K>c{GuFdPNp!%kKVPjbA6JWXyE$(qb|9eeiiq7i zBcDX_6UGH%!li(p79)@A=r5CZYTIXQTXGPbNk07)>%*u*rA511w1xFFNP>F29BaS~ zJ_RGoaga)NhwM8Qnxt)Lxs?Dp5a6fBZVWQ`Y~3cz=wr5(xmit9Wste;p-2|<(Os=} zrXSDCEOsTiv&_<|oY3Qzardu_qjK@k_uV?ZC7h8yiFT~CYwlrxG~2vOzpb#CMe!L` z!8^KP6za_GaD3vkA-$YYYO2_&w#CZsQDzW96unphj7U$mwYjT;L|c&(k2Y;5wp)5C zk#3moK7Fsbt3>boB1Fr?FN~;w?ptDZMP7Eun8;wJ1I2GDneS=xEPBJYR{I@pnw?G) zdST9XW21cR^82EJzIFYa%yxY>ty?)7>1&j(fP|4BGK54U+XFDXohJq<)=($b8+;6X!rm?xl?`~$ zh%WtrYp-jPG;C!B?5j5C%oF#rqW1~{c3Y#TIS6)iM4IYtVI^fCJSQUX-l!E)@|Az{ zm^pWDE;=1t%DF3N$(VI#bS0m@!Q@Z zb4qphaF?RuHGZz~nXfMVGkM@DXC()yR36XyshN`%pK0Q2WfZpDp{-TAC{}M#u@KYY zVriDw6Z-P&I>SaTRX}lJJNseGnIHNe-7}}zI4m5Owisc4*3Y!r**%5-$!iP1KZj7B zS>@$uGUU%XKb{}STY9rAb?51W~(LZyt1SJW2-u=upqQRzqdGg&kSZc7;~!aJWEa@O9`eP*DvgW8!TA+_TTF zs0n?8EBs_70wW-sHr7_1_yn>C=_+;#v1&Rhg)$7-vuC9C*x9{a4E`s5LtW20#nD(T zscFr&4B$l?VWf=b?-z&{HuC@vNs1-hCA8h`xgNNfD) z=I-;UeM=K6@z$Iwd>ntoOYWE_i8 zuh_`@Cd<(5YrY<;RZqcm57hbk(z0j`*+c|%q4=G+oLtW&+7ccTN1SRXMR!zcn`gTz_M4V!>sv3O4 zuB#~-Bpd^L4S&S2S&=`g$U~i4zfeO*_v#y?()H*`sTj{-uA>Tb83J=T`_4>IYB=ez z{AGz%p$9;vPw~C93%1Q&!oJ<^?cXh3bbSM+ zN8KZb(Ij)vG^Qf^aT;`Sd^nxIzjA8?75=V^(JyTKA2jk)K)y|wvX)z7+FatZ)Jm3@ zd#2U}1&eQ4e=ZU*%XktI!nE{VzQ7tCHcQ7u3D0fjP9bgsi1ML68(nxRAv&8yo6+(} zYW>)oH*fGSqvgEuSJahpo*udC6`kZFtibaChNj_;2!FrN^lYwG;7-u{q$}yfo0&Zx z+qwuwyL*7aNh_cE@>U1~e05}y(&(92J*_5blp!WRrK=LyM4y$RVd!)8n8M5|GMoZ| zy6NtaiX?M}?+=yvwJCC#C2d&l43}naOKoc z7tZq4;-3x^G%yL~sW9Snn^)DWZCV)rD~(P6;XBtS5$_jwGhLf#zyfl_PD2N^XPM4> zzL2s1HU(JiJ8%kd?d5EJ*50!L~NP^ zXOPj<&3VabG3?!nF7M|9P1vLv^Iw2VXKyVp4$<7_a@x%2UzeYWOM6c_#PI5#OXCN8 zucWSEFq}$l*l$Cy0M?+p*uQ>Ph{1Uz;Bv{Ubw7OKUv) zvlw7p2@p_4WTAvZHy)d%-BYMvQDc#2mV>dbsN?mltT%zL55F~!TQhdQ$r7U?7rvyO)IrC32mEyZ| zr^h$LWd0B$u-GJ{Yfj`)Qa#owvuCbGI~_p<2S0)2LKi^ z3JD28NOt!V9-$^cSD40uN)6$e+@M?XFImO!*D=Tm_~%7^4qj9c$l!omiC!QvH@7vvpnxB}ThK^B+!H8a{_8gsE??FG1xx=7 zD5t{8X05_~xfjG^H{g5h2+1@K2D4e&_FoEVE zi0cu45iS~wQO#)rpe@o=Za&Kr?Vr}_-yaOp2QtBF3?JMS^rhL0J)qj+Hs;Mr&8+de zroqo`tN_^{_Y|PhsE%Fi8T!n7EwiMs6~2nH-qHj&5%=b@&ic>M`UQ{K^84rh=;{7kK7JX{bD)wjVEHDY7_;ar zs`sCSS=OTDRcgFCBFthM;9f-8Zs=}~5_kCN3kVNGhhBi4{Ob`*^E4o!2pm@0AZ0i`sDL+qp1)Od?-nV{YbJ60R`z6@^Hnzk0M$uk?${+9uoe%Vag5i*a(+Cj(2x zpX4H!1r}#TzEht;sLQJX_!dG37CA=v;g!0d|MQc2)HWeoj@s?py-?d9*^LSB9ouga z_S$?ZNaA7f!ZkS;#cQTLHYL1ip_4^Cbv!QB#ZJ%!ze>4wQ&&RKPdV@P_mwH^BR@n` zO~9Zk3cr3uZ+k~a7L1bL-yGdPzBz3V0Kl??Q)Sh)J5y^KHy+Q=eWnY~lc=@&K~3?& zU0TKG%6XJLi1%@q6yU-|6)C5uLA0dU1+MHUL^%DBq;>PIOGb7!#elZ83>Kd<8#_2P zWC57we|D?4Kd0hL#3?UdQSA$#DKYcrj2Bs=e|2*b)Tt0|4>3@{u7F64u|PZ zqzg@XD`e5C;{HBTyMT-FTtqX7S||B zD|+Jcv{Pu5EinW)q`oR@y4urC+v`?-*n%zcf!9>t`#2<{FULqUH%kp3WEWfczI~|I zY{|`WGVZ>Pt!kc{nnvkYp4NkC2$-})W6)IxJ}(R>IG~93UiTlg(i=0G`cYg zKHSi$FsVP}@SI^xx~7sfwrCmjzi9q)LR0aZ7I@)qluQzGY(g+2ETc%{QK8sc0ynL9@x(df`azCz)^WUj_Rlx=-ntVlC)rn0pICo zkm#?mQawEs#jK{LXWXSetW+CIoiMk)N9cm95>D_MHMHHP*6ru@~%Ak972@x8EcU^a+qA zQ~Ij=&Zop!9oWg%tLN0u^225hffK57z|?$BVmvYU`VWzX$Npbcm77ihgFZQ;8+Nw> zPcz0d2X_QO*LuKpH+8Z|>t3^stjx?pKn2yi-v7rmO+~)=?k$|En-RlX5wgLMK0|ci zkzF;5%(*n*_XvgXWH;w@t@ryU40Ae6SHX8dK~A67Ez-pwyf2Dn^7|yge8LY|^Z;=) zPva23c!5~)VE7u6vG+q~WrGP6v`N8S^HFj8xNH9y75;q7{BN+<-P{}S`c`aGDmi)d z1zUE+K*(k<2(~Ryv?IAlt2)6Mi3E{$oKnYby(Ev%_S{BWrt$-*c4NDrJ%{on0mk*D zQuEd8nRJnmF&+lNTI}NXsd~Wg!vX7TS^oWhZDxo-xiJIvsc*R?i0=`Ts0e%n@R3gd zV4)sS3wkmJkyy})XzL5^$_{IVM2PCcgL}~!wrME4*xi(HOH0dae0&@SWpZtZg9UGE zLQle0D$^?1yz4%r{^#6kL;n=vBpJ_tXYvPU2*d^ULhoj%c> zBs3U`kzTnYC9o1Sq}{N*{5s4qe&$y4^*pc_clk&%Te<+opzc;qu`O?=OVOnU-|-~kSn;!wL! z>D;?4U-Dl?(2oW=`IJa?(UaOHZDmwmBVIK_j(VNIO@|iMVBW@M+s)4lITD>NdH{sw z2hu%_C7goIt2b6M2Jq(=_!ePdB|umj29~Sruu`** zUZg{zV+dqpb{+1n{&fk_bP9J4(HP$b$|n(HH1QMss2sQ zqQ?k#x^~K6zc1erzD^+`EKe4Q6ut8>D#bAfc(VORH@R;8rBTp_zBSW&`vC?O#9xw74^aY$mh0WPtkhJ~qRK7iOE60r0wK91q8TMBpbd|ec#aiYcyf*( zUg+#uK3J4#Oqgw^;S}PYbI{n>c>3oim(%47#F&*|3t2rVsa5;t%^)3j*!(vBxq)YYO-9j`+)!h$ z5Buvcx~s;fLA%}tU3&IcK+IqAN0V-bil(@0Xir zihA;9Y?EDPN_$EdH{4qafSy6J$dSx((wxC+s3v@AiofVN=bx$0M;gDq_e9!g)Q;u> z14XyUS8_YJJ|7M`pdf#>EQk8(vK{}6lg0%KX6^lF<1+Pfm`bU(G+_w1qJhFdlzN~8 z$i{Wf-2^}u=8@op!Z%^HP@5cKHKAQ>g{k%_xb6WC_N%o% zM@$~uT%bN#@Pb>_nU!;gJQL#qst94TZwE!p3zZBDnB-pHPUK}}*r>{Xefyr`(>0Im z*W))2oz9@caw$GS|NVnkCIaFT!H_rs{djivM||E)a!T!_(N!P?CB{q>oBxvt2s0N+MwcL-YQ5)EIImeG~b*$M;woQQ7i1! zKqA;g(YO{5weSWT$%?+vcr&BALsk65b;6q+1KBUvxA$E>%^IZu`}%5{m8Z3`$;$bh zH)5pv^S7h*)CxucBaSul_Kd8oG0oXX%h!UH^FcI2dXP*J`c}Ay>**{qW6g2f?Gqo( zz4QKYmcnIKSN{vXD0tEOg{;ltp5Z$j2kr#Io~xr6<$-soiNSU>)|)9l^$pB-2+5r( z|5`L^vIbtxDc6C&aJ{!m-j~r~VWP_pNgGai;R@)^P zBqk-0_IISWj1^Bc$sdWc8~Ji3!rzr{wil8gEV0wtMdma~!SXso1zh_|Zs_b2ls5H0 zcev-;f!GV=b+8Un*)HO&v0nsm9WhAHqL-nb(98yo<+)r42N?L$za|zmP;*4g_?gy9$C0Y8oms9A*!AZl3 zxN0sEqrjCB6Z@+x%B_`V(#v&i1_zd&WXBj(29Qx44H5RvLnaVNve1*M)v;oXCOD0R zOAH7PQZB74vG|dUKivF|_D+1n&EqVuQRI6y z#F9L{Mc4vzyk<7uL!zzPX<40nsDXL%B58TaNg`sF6bNwINuw%mlWXQskj`~rC_M0G z_WgC~!ULZwj=eZ{wD%O;Bbl77Y+4sS;nzMBmc(1P`QMyoWnSB})X+AZm+~X6ZqX?& z>~8%}_WrOB%nj4DKizA+>84=^ds>fGVtc9`G|6qDG4CelUda^yP}Qd3#9q_yws${B zoP}(#$9(4$Y$m=M${m@6eX+@|E3Fs8N<0=R(;kbo+}$tgJ&^i)J(K{Z?hfYZ&k@F6%TFN|5N1Gvv zLCpE*_Y20CpYMN_clT;dEv(i}cgW;15KY$_?%=xEN40#>=60_k|1Nt0luzD081O}& zV!P{k-EQf%)~zKM2V*-JwZ6CxyD7sad22WQHV$3X7d}6>uiG0>^<&c?u^$hQt#_qhO7NX~(r=`e%*D}lK zwdlGZZ}ye_e6j?0aBypxoUq(mWqoflh+9z?igEp5ruPc6;3^qR&&bZN(W?4?IJ)jY zsNX+skL*qM2xZ@uy~)T*G-Pu|WjnI>-kZyeBHt8FS=oxSSLMtzL(Y~xe$V~>^@nwz z&-1+BuX(8!Nzw$QyVB_9^5k+QRuED@#jni6M|nQRY*WoIw6w)bvqh%XP(ZPkV0i=r z(h);O9d}*Vfizkag^zL>ED@T%1EZbn`AQ4h16j~!O*l7|63~9s@=%r-2+A(C91Pdl zAT%K&?2Lkqi1Yq~CIEANqT0r^w@-Q6?z7J(8AdM`1=NyGVY*N0X1i25E+lX22x?A6 z)yE!9G>Ak;`xe^F0L}Xd77u=Z+PWiE&pgW7kdb3sO4<_ijBp^!2h6 z@lk(2760p+_Zt;)G(Bj$M6p>AQspN9h|iq8m8mc}eCW{RtpwQcRy%*+D~WZuIXk%b zf_rSMX0K9(axJPYv6v`Wx^QUd-Zj|vnOkrcnrWZdMTN4^l`ukT$vGyBitF8Df!X7g zb`ugMf=fAo==zTDWbMxywY{2V3VMiJDQkQzon!rmQ9K0dT_gC7ZUp%olgnm0Az21w ztUbAL*nux91KkQgTl~E#6vg|)KNs-tD7jL3?Eo3rS7|A zf%-*?a^tOm$2y?)!@jocKZp2fu(M`c2UXuN=qRa)=NkUIXQQNv5Bk z-y8APZ{Vo_nDI@7a&W-If#wpKL(t6955l=)(!nz+fzFtzvS@T z7k4)8&tkg7L57(R3;@>U47dQF+JEe>U($n*ZH2qNt3b6@_6s05fGu`KcRu0OG*(ye zAG$Y*Zg|=jbczCZ2A50U0GRXn&0T7vvzvCiC|uit`*rSU1{_HIxLgEqEu#_vC1=jI z@&m*$v$&Xb@2?K<15N0cErcj;#99GE*NvlLiY*k_4%jLmzKzIs1*Rl7LlQGevP9h5lN{lRx?iJz2f1 z2=s^C{vdNTLMfWofqc41Q@OZH6bvJUee5SdR-#|*bO}ToS)TdA8IjlmU^?k@-WHKR z0ca|N%Miy3EiO6Hl|>+VWt*u7HWMvyx6*O=Q)T7LdX{d*iteA^ z2oFgZ342Z2>Tt5V*qIa2DFb!r&w9p9A@2wdV*{0=9p}<$cbs~P`eh#lW_C5#+HF;~ z7erPGppnc$=2#;(_%rw~&E3`(a9*W@v%EKDNJg#o@qecA8-1cd5+|oXD9VOy6%W!r z3B;vkphH65euhmN%;5A=#9ynzn|o5Z-PgL2VC|c_)F)YOZBN?=IjrRMRTRf-Ai4QnUWj88i$4l?vc!lD>^Um{Q7fa1 z8O53hD2D(ax?M}57zpQ3O>GQo&4?Ou;}n#FZX$d1lsP4hRH(@F)a5d`?!Y zJVqN}ED)vLmH8v5vOZ7zmqgM}!EX;6Im(*Z_WFh4#?tFtdT=C$EOOFmtx>a(7Hqd7tG!x0hmr6<`}#8b9E5qevu?TnN>kIF5gBeJ`hO5HYbW~VwR3cgmc zFL=&nwu#!$A)97)H@6AI?Gld32r&45Dx2e*h|QI;RY2`DIFIoS*y#UpbiBb+28FJt z8Kkl~HBvtVq?p4xzlNnay_?hW%)&*-H&qgPBNrzW zWKdob4C5svyqabazRZP?DwY@)B*mF@yma)&oXJM!n;&H>VVs=@Mu(ziY3++nMyIfm zqFV)f_e3YsCKs=xQ8ZIWOmFSSbMav1{33U+ReR9v2%u&Z5L>LUMB`51sf;#d{7im9;t+x^A`MWgwwy3$R3-SttrNBEMKv)-1ru2K+igK;(t)$yE}o6yKBA;FA*Dq%^w$wWLqJ zp#lP4KDBwR_OJH&^{v;#gTQNDyXm0G>a&|~Y}1DZ0fhFw2W2Q}@UK9Dahoekhnjyx znESDtm^o3qS!u%ahw5a;%)H$X{yR!@1xE42uWY#75{Kvdra95Bu#|lt5pVCK+X;h1 zFpB*$WS^$y?_R&TEgVv`rYFb;2@Yr)z<{w->cee`b*lc^0=9MW^~Cr#5oX7F=U@Ll z##&{p9)O$C=vhv5(#!P+ZElYByLv5#8a;wX#(x}nf78Hfra|*B_PmU$NYYll#(@e} z%nG1vSEu@$5YB_S4BiAEraPu_vmmilZR6hVB~*`c=0$H!?$X{=JO+Fb1(0pFsv(MA z5EN_(1E%AK=9*i14nkc0V}`stx7S#%gf@492#Vnp_cr>HVoQKCL&~wvQwd-)W-i=8 zgieJ%IbS_(8wKIXI}bb+~*bHR^c^+Y2ngZ*HGu3KKv{w}Wf4-2T> zif3~tyR|g6{CRYOR~+dh0DYjR8=g&~o~kl~ZjPuq={BT^eG0w4N(VeO>DN#G+}((L ze~fb~F6VXN2}=xeJp%{)XRlrgn0vO&=Y1L+LZe~zWt6VBrWTj#ga|Ptb&?rl=*e6i znw5ajHQ{cb4BaDe8dKH(iIyL$4z^bFbZ{%Dbd7NDt_LOWo11!366SS|1|kDVcY)!Z zsy+d!F6#}I#sKt|-K9_LMB2~Ej6PTh{H9T~e3PoL6@mKQK>BT>-N9b|6&I4Df>1?l z?;f_h?F4#i`3Ne*-yJWZuI<<R=bH)c-GRaPdAe~&SdA#f z%$+AytXa&rm=5dx=8^}86fwBQcnEL06meP;=p4Cg#(#rao(Lj8_Ii~CxvAFVwJHi_ zKs;f{w&(pu89)HSYpFTy^yCY*$c$TT@=vPzVNiYz%{*|oeZy^!Au=zp~6eQ#)-9|B=xBM^;@O*cxm4Fk*aEnjh5!7r`?=wT@w zXI;)`B16HowYG58&{wIk=eUHmJnSD%zB)2Q8yL$-r(a>ES2X$NXunZga-GvSYa=Qh zK-P!Ovm)Z+d{nUC5&Bi13weRDnJIx^zyP&?pa`+f> zy3{Vj@Vq%%bYO71&xNOt2YP_B?E-}aLtVB~%Qy6QoH4x0)hhOKCDSN^eyV%Y;EBva zAa2?^7?j=EI}^RMRaNN$DDa;gQ8T-1LS<$Mww7xYQ4K0O=MTnH0;8e;XRk2z3E87TvAD-@H;LgmUtX8gx7mpSk^u7Z}vvdGY?rQ2Q}l4 z%WnL<8xb%$($7@4qKDnf#YH|u;t|JGjy-f-7cPc6rF`mT%X5t^T9^g;g5H8`VOCj5 z&wi&HH*^7f?jkQ;ZfN*A1<{gu8h-YW(LXhOt>aq`j!DcvU`ml)BFkU8qq6q>n_d{+ z#0>ti$e8I*u6c6qT@OHn^vK@^uQ5Z{mPtLw$j|tK0Pt?i0espZCq}_Lf$;Mxu(v|| zwG~dm|D_3cz)6n^n&hc7%AEqF#ypJ4#BE#H}$caL}mvz18JX~?Ljeg}ZD~^5l0n2jk?>!V$PlmR#q@=;&TV~AJh4-R+ z{icMl;_+^sI>vBSltI0ULahg2xT%-rmkWV6y0gOcH}vJOBoc(IaP=_g7Jm33`UBTr z+XeC(YcQB@y;f7*rsYr%BiW+adU5hBzGL1G{G0=&4;dYCFg3c4rW(!}iZqrhc|8Vn zASx72?lOnt!)TvF{Dd0UWU=_Yijh6hw0xAVjgvyW=4S$({wGZ}n`yS|F+Y@c>6)Ub zV)S>FYcBr2n`Ot|yl5q|wTqOB`vWdPiLBx`(wqthfR~qQzjcHHE4-uYYfobSQkD~^ z5sXF6Em;}@j@qF{8I%%M8Pg>CfP4L2%K<`kU4>}8&Im%*Y(r}W8D(d)So6^fQaTAm zQvI`|IIvt4c>$1{{t&9q$(_-ZvWp5On7;s&CaeGaDYcFMb8!iJs7_=?D>YX&WjK3&8F&oQu4t0(lFC<)T8p-M{E{YddYXaykI^&sV zk%i1s=A)t^PUIb4Xs=M=m* zVk7eJ8-{62Qc#9CiIj10{s)ULWyqr*#l?T(D7P&TZkyvn(0xZGT#V?xHNemXXFWxu zS&&a@WfM#)B=MEW8o(C&Ok1*Wl&IZ~NRWWnP+F+ple&Oq64m69d?C1duh8~yXJ>4> zuA{_XFd`BT;-fD4J#v%qPaivvt3A2B^K6Bar%WB0WV+Gu)*n2MLW2ZPewOGgiY`iw zq^)J>8#i5Foo`LO6|POCd5EHcg)amXGPVF&MsG-Ox7;k)=gSr8@WKgs+rtCZ$PJN&EnzTVNSDL{E(8&4(g%W-yxQirN(oRg^Gqg+5tjJ%1 z8O9x93>AiE=f7(%6v=?h?I<0Zt=s#A`C-0^oe-45LTE(D$)6f%kOv9R)8HN_d3SQr zgPLnxAH574OTLV|Tf0f8jb*_otWU#tEC*O1Jf-uZrH5(MyV@379;rD#gha2fOy|On zn%b=wc8p#`24W*E8m)>Hj!907ofnHdC4yq;LWB-@!9^ggO5$HEaK_zy&zG}VvK_It z3aY5d9#m6oF%}lW3+!@nf_|xBj+qx=z}N;3hMy|yZFb1Vr-(nFzkGb2kPDu*|skzQGAZ4Xo1pTK1P6@n5PH-Muv zhao@OWQ)k+wAuuNw*KA*GHEOUq zo~X{eC8?iH|VZNFf`7kG(WGcqJ(1v2c&&tSv7}_wZg&)8n-xzy#K`HuEn?3)P zhXmV0^^P0THxS!3+GOGe^|X=?xJh;hAb5e%PQsdMi%O*iYUNAXih%KPsbL7f$MCvR zLyMNQZ{;yt5-Nr3ulJQ$ALD!*^67d+CQKwQUvXXKFkxsr;IY5{?tQ=<=otDwl!h}# z*&rx!Vo>-Xd2)V#KzV`bpffF<>=_4=39?dGce1dL#vy9C$KVu(jjQ0)fRW0(z7 zEK&S`TeV^Tm@ar5z(4f|@2KJ0MO16P00m=wiH8709;ieHMP&6;KL)R~t!XdsqKEf* zj#SKg@n|ycq36_GV^}+HPUim&2@Zd z(d6|65LykXKmaX?&dB}H5%(+br?EEit4RkO?DnA}-UYyfwflWJI5LKwX}zKiIlTCQq|k&Sv~pmoQVz8eO49SdHkPd5ioAs zys|1yP=$eOJvH0lYXNZLq<^sG#v<~7N0}E1ispr8Ra!K&qtQFMnS#WDEJlw|235>p$XV5H_x0hNaWAr}!DuMl7-dhTlbpYGvqOvjoR#Cc*`PRVaQ;Hr7 z9q*7fN7$*~^TwZn9uT)5nFs33Z2XJcg#eqK&B(nAAO=YXv$;p1S3L9dI&A+8=p7OZ zwVR_4UV>#91{#k#=i}pVP6~e9=TCfrQ2~bGl9&i6fJk(JzOQN+V944aIxEvg&rWs{ zCp+msPz?O{m&v7%Vx-onSow)tHL6%VVs+kOs$L<5+7m;}#X}~Oa zfgE}lt-L}IT?eaywiuj%xGhq0b^r`@+W$SRc5}FS*E!9UMf)kIN=OhuEd-xP|LQvT z2O0nGgP*9!>A-;Gv3BRbNADjT72>SA~Hz+qP7isr&q?YUe62-;!u zMK`tbiH&(NS-&A@D0hhf$LCneP)H^7@G?LH$X`Aq#K`?Sxn!4da|uutya0b$`jGU@ z;8C~XgWp$vjY0s5Xvj8t9ZXCtx3DAT8VC@_q24s|6Z|w2Y$D~p^s_u*3(y|M;lJH= z%WScW^QY3q%fKF|f&?f$LW1|#l7pPjJfPLpT-C{HNe=DZi{Boo0J6O^=jUio8KyK6 zDX}#K#9}i(HS?lY1*vaUkPRGWOZ);kr$4we&Z9b8iR8$r#Jg-_Iy^Eig5D)s5MDo zLIw)-jXH!d6$xP$fs}*Omj7=<*)yA&y}FWhzR8+%}-OvB6@%_V}zxJ9i->r6YnsJNBpigFtsYum>VHYI9h=7TH^8@AmS}+ zj+=Ka$0xU4mD@*s_kukw_a^(t8{9EbmzWc^7ooyX5G8v4*~Am1ZuP*``h_OU$_)!| zep3;}MshWPGTS_^W)KXqCTDcEH*tywhQ)U{6ID7+i#7dtWf(Bvc3$hiY+L7pDg?*7 zuyAV*pdBzlMOhml%;4+Ry5GDOuKU2G=dKb!)1(xjzmPb6W71Rk8j;t4TZr-zFpmp= z@Xo}GCW!5;o&@w+ecW~6QI%CDt!H->2Oc}noSeE0mf zgVte>#%0pJ?M)u%-&Kg@j@9QV9d)e-THm#(U^BQ(_L+75(9~tyEJ^;O$2P@7Sm7jPtU(TJ96A!1y9kdcBP*Kbq-lp;A^5HNq|43ab|c?4X&3LuiAl} zLLt-MIC2W28&#)};d%vV7|N>wiY=@b`zJ(mZh8~S2B{;}ENf~0;5Qj8f26sS@f;;P zOg^P6NTWsP4g@Img&PD2X?KS=gBI&Vu;o$%Zh#T?nZxkegq=6#B+7mH0wv9)7{Xm{ zc@ulFm<{?s{(CbxT2NM8?*X(h^_#=xqJ~7hIx+2BD(X$(G5~>B!*v*9;wBwO+wxix z$6X$C76|2B^7-F7ZqLwBf1m(%yB3N#4;t4MYQj~-Ygu`#0F{#M2$@o%Q`B&H8{yys zH7(>t1opnR-Jx_Z&qsDPQ(Pu#iKZLvmD|a{HF3CmZk241b`nY(WBdIAmCnDF@9tv1xyAd>P9d!}5k{+F@+GJ+4lInzTxS z9b*VxzQ8=@V4u6LNoV=7gpL}Gz0eE#*nOGDz- z-A#=Pf8*kvvbP*&Mc!1yas}kfGsR|B<6?W0xBI1Sr>vf(y@wTY03mj8p9t`_g*hOjR7$sJ;(<_`CY~O z1MBkhfA~cpm*MO7uvs>T`?opS z00Ek&5{CwW38r(VRi6;0E1h`9%apoj)%%01R;}_L2qY^X6`%ZMKH2}|KhwU$W_{S* z%Zl&zr&Hdw{u{k8X2BfX3~>gAA*vG%0?6Kx%6`F#_+Qu)5*Ii`?_9YR zz97_*6Mcf_KdY!!0O5K!zdiYTYXVxAIqsCK!eOm+98)!P^&EB6AQXOQ*L`7Rt<9Sn z7G6%xtH~RGvlNV-9)JGe0B!nvzeV{uDERyO{vK|B+tG4sU!2#3;Ne{#>T0T)ULNUy zNvw*aOpi@!AZ`W*%V3Xo-gjnOV0&8$rx@OUrohcz;Zd}SiexeNgQ@2^iGRd?Iw!mdqdteI-MH-vH^7nfk z7(GqZ0JRzI69QAw15y)yIcj-OOz+~#7d&6e)!Rb~U191;DmhR7Ug1Yl?W zPku&RKdjz)quW2&0<*~O6eORz61g*0ydvWF8L^;ui9T=#bS0K9Om)(h1QMv`fyX|Ki~ex%E3;6p}4eq|>1w(VSU z6+4KZM$DNbQ_{X>B2iypXE!`(#sLzalv@GR4t4q>Rz1;+(|u7JK8kQ?@zmWxa2AN2 zlQ}#aB~Gu?TZXuKVVbsWyJM#Q?S}Y$;J5tac-V$xF>4P5O(wt{$n7$uyv_o9y5u)I zZ~=4Eirs4_*zvvi_M>${u3}(z@O5I*=1BN!-m0B|#!$sq{SR}Fv)!MPrE|2gL!5Vq zMz?uL@y&k7L|ugr%dMfCCDG16Zr8BY04A!LYuE?=M50Y+!dj>0MZYiq+6>-vYUJ2K zZ7ZPslv^CDVeaS#{!ImM=$a;pLcjM_CAK1>OX|1NA4f>c81uQj-w}+w3Eb<@=P1Jr z)Deq%maDUQOYoDl!J#kO*ntR(uIhh4>iq`0H`->Kco?{`a6z*>`5NLzX7w@JwhK!A?tdJo1rGW8?Ta&l z#VIiswosW~r!t3_ad=6!HK>fOr*VA+?3|HeuPzL%+Skl=bC`a22jAWv94cr({ssa9_lxCKyYAIpYqLdAPuV|k1bmepOxEN7J<6nCQ_#YPc3X7$dgxBH~$^U z)#1{Xb^o~2oi9pQ5r?U(%Qmw0(i*yQ7>Tk5$v$eeIUE3b%a)YoTSf5BAu>)CDpo!D z3N~u5P%dzt|C7gmKvrHe7Dte#a}urAv8YJH{&6EBU+RqZ%B45V;MPizwm3!M{6>;t zfmhqbLT6;;*H?RL+M*V(8cq1<7GDTau+L%@lFS;nHPx1<_&8=Q(xP8W6lr!2?E?Kz z&+k}_3CD4j27<3~-Xw3qv7~!;hdX5LwES7GOs~Q^kUP@yslfOf+1PU3-ZX)+UfQ<+ z7tDw7MKf^E4)VMQJB#uQp)au=;_mNCg+}EHZ%o4_ax% zqcsWVoop9vn!UnPSyUb8>|jLEpMTQK^6Tq)$1`SWVzU(|DVvfO=Kn1?7SO>B&zdWt z#vRDmGrv^ffz||?h_z!tDbcdG^hmikZ7#2SCj=e?aT~i~K>b0{uHh}O zc^CtH_8;5qpwgnby+Q!lmiiq(qsuQ&@SAT5$$2xDXZScqgJe)@1ng0~wz%L3cO%*0+06Jw z@uiqZxsGOyfvY5s3`Qr-x|(jhUI#W!RSzZ~&ue;v%|z=dQn#@-2EW1^B-#N}W^W?% zo5U|tf2vsm7}BifmJ~{DCv`2zI%w~QO`@>wiaPk)xd<0;kI1m?WZDMWM&^%w4K8JX z`x7utbb7wvzVN5ZT73IS8F<`W+1oNaiZ%EeB~Gc_1lJ+i60xq85&16?b%nqC9?o^0 z4QWpC{Qm(7zElZr8-PmonZV?cnT66iRPzEH{5!-K@l$LjGVwr9L`8&&t&ZJ4rx9ieEH+K(W&fh@D-L%C0n|=YQYZPIqk_*nS@JaMc zT6w}xr(^M4s5WteYWo`i#;Qd1Y$NiulW#I*Mia?Y>^lGj)q!BOaAL76qMBLhjk0cN z2&LUm8%thf_ZthWIwP;j-; z`jET^#4^n=o>bc0&?u*k^*{L21d4VBLIQ!X?jt&@!`Pt%r&5zvB-i;9D{##`l>F@<~=ID~gal`ah-`J?+nddTNt!`uGF> z3omiDbv%HKHDFM&xH6#~7>`WebV_9cFum2;xZ35B+4ojh)H z^l$&j%0_hf9Ed}eAm@-mMicF`7w`2%S{T+pgnb@>+f*)?odzacoq<7c+MQR^&e;gJ zxJ|y(WjHVPb=hC(tF1iw@A1b!K#}Rt-o5)t>MN(Hb=z#Y|0Td)XD?0Xf6)EXEp?V7XZ?6HsH{)LZRt2~bu&*Jz+ zZFW*u$-*|$5MeEQkt{@j`Om!==Vxq=BITC5oX5pyTq7VV>Fzeck)fS48|&eR;pIR( zT<-y%xVpfoYeMd50Age70AtsL)5V1_2pt0FWTRP~K6ssL?;#=oy7I6 zJk)eVr7X^}1yX5`!ZQ^(G#Pe8gMk3Yer*iEUeBDoZTGZ}a6wa@yt0{OUvTZ>k4Rdt zfTn&MC`IZptltr=wLsbw?_EpK<`Lb-4hfL^_dstuf8VmhaT(2*~gqLXk-Sv-%2{E6w|>uf~+ z)kYo=TGKEr*`_P%D-c3XNk^=*GXC&ZKbHUdp==hgzHC3!JNNW_XuT=kZ2w&gr>+7El(kH`COp)YsDz zhx2G$Tb90JiJsT#cxP0k@onMXPtyha{Ycg+6%HcQbz5r8nd~*3aC;EaSLB{3 zoz|9lf{(Rzwl}$T^0o-*UCnIn{2Qz95u&2!V>*EYLilCf>phBc^BQmN;KQ-3PPL5{ zg@wQ)-LSM71f>>A0;Let7v?@i6QV;5N$pfo?RY9H*Ml=gh67rTrH?1sMG}ObQamD= z=08)vtke4v1eUhnn61m0FNqsx)L;f);l?_i_0{~S`55=qA3Ut0BsT)HmF0rE5fS=l z;7Q+-kyeb49PgB)%_l_3!D0ZnR-!`xlpYi9mj7zuDGyNGmsw}1deT$%bmIxRhmNK; z0a5%EIDQv#o*1uyU z63Mxrd$(YRl|?-&ChTt!I^N}VQx@Rb|zEJd=PtAS)}~6Xs&c4X-F6~XSeK+;PP?!Zp*s9 z#9$QznA2FDi^&C#kTJgTx0n!FZezrkVs@PDK7MfdB{@DQrivZ(_5jb% zvIUOgQWf3uz*&KC#gO>V(3&t4eG)IUlju86A>Ei~2eUyH676HIjScZd+17ra(Yag% z!}t4Q>v{7_6e|7>sJ{Loq_N9!b{7LP7Vur<@N<80;7q!{765XOBR_4bv*Uv8p4pHd zN7>L6GjWYB>XLjfp_vvt`}nzKzaJ2VvRomoXL*lq1uk0A#bM8>nZ=&N!lU~Z=HIhh zXa@@*@Ey;WIGQFm7T&B+o&mvS#Jy6{E(Kj_4T1o2tU&`U?R`9^b0(5$f?_sK#P^0Y z*Y(7I!HtxE`d}ptead;asA@)nL&YEFHf*sf{ux-MKioR&a8z}&E4hb2z8IkwJCV)% zg+6CnXD>GZ|H1U&U7En_+qGr_c;9=}Nex)?8EKFcG)rnB{!MRgz*w$cm9$G2G_hTJ z0vhm${Zhu*E7c6rVy5`JfNff|fVH|Cv?Kw4q$x;AJ?kzbuyz)l=rixxJcI<` zr0_XZBN`74nkifMqBwQ8)XX6gx!;A$_U@m{2OQBDSRn0jXHWq>| zE;apao=3td)PJ(aT~M*d*+xQD%8*E6L&+VqCPH^jUv*E7d?9#`!Yr{>od`h`z1S%H z((ig*75{T$3D4(WMis@bWX`fnar{`B2fX-XaKS>r(bfh6EoBoW_1`Y}N``O-w9`8^ zYC8|XjNHA%*P0W1jMW1??c`QHYy(vUL~Sh;7JuXUGDo&zbg4eJy7>g z*N?}Pk4(LHFP#57DeZC)jZ@_?5nMd9m>dsh3NcWhF1>g(GkmXe%5 z^_&&~M{09DA?FF`koQC4PoI;uSG4hGETn2;lC8=9I%zQ;8Xhec-l4wKi_4(&m5QW) zVSi$k_nWD;=t$~U6(Lr;f>&}O63=P+0g1K18~?bYbjnA{6F1o)8N|cm^(w$akn^=& zSUzM~ZC-F-SSPTngkI%`iZ2EyJ_2O%%j;Z0zYxW5_9=2bSK6Vx_QHfN8DHODnOT7= zYa3J;^<{49k?|(5!>IOe=Z{{kJ{5A4vy(s(Ip!c3xErXIsyN%vSRlVY7O|?~)#mUvPnklH+ zI?IFdV>}GISq4KZ_o2U}^Epm&a7`}WQNA`aDE(76%nMY;VMB*6fg!|DmX*LVH$3hn z6AyC(>JZJ)fv3h-Me6c`0Pma6remFslpe*_O~TPs#eE%i*oaIvb;N_94DPcItt3|4 zwVN@4x(}g!k1pf{Bai4TXJ^ETd?sWToj8+wH~+(_upDaaJ4%JW?JB=JYfHQ#F%#}B zoX`mk(w?vGFNrmclvG4tggj*3B659C%KgRYM)r%~A5@d~UvG_TnxPf}VPI=qKw9(r#oZ z`UADo0ZPDGnt{x#6&$7ZB;qk#;j;*fSF()>VK*UUkkw#83I}rdzkb4~YWHB)AF^k` zPIpNH{-YF@^xIuPo0^8$-rz@ut(ETS#pHT8k)#-Mq~9l|zQ03+&keosm z<(Xu~^WG9R_QI~n*$*~sxU0V{KI^-S$0vHw-E6J4sYe{ZJ7L5yhf!Vr z=k3|21VJ&qq~2kj{5LwiX`E7m;E15fxnvc!u{FC|nCi)YoRz+GPIfN`J^u z{e(-O4zbU*E4I^1D=aSITVbo~gk#ZT$eMFD+Ju`CrexyXj(ihbYZ@f4AS!e%YP)P+ zF`LV?L8IC)hzi5hMk%uhNaT+}@O=^ck;GI%pP#m*0+iTOqUN#&;cqoXeTNZTydCFK z>4Zca461oL*ZN-;SU~!gDwjDyYY}1;;N+f}Z@(16zVP9b?x)!(xzEsP>M9f@)fllA zHocqjy&oUyHf!zpWnjd+JH+XUx-n7>^X@uwZA5hgPSGqjl|~+8KtkBW8a7$)YJQ$l zJ7$K>rn(Xx{6|(=hocVy|S(ltI2n-9Eu#$=k3pnBG zGfdq{#Ci?N>XwjfiMpCW&^?}e{>C}-=v>f|?-< z=ZBZb`?o%JW2k%WbxDCRMiZU0%yD_#^|~kHRn*LlX$zM}B(I=o-m$@NA!;Lbyf4UH zH5%Pb!0_v<=A^}P@CAVSW#1F`c&b}mH2yd&g;EaG*9;PdoL}G0Ox#oEg2NpxVNb|a zL!_W|CD#AiwxXy^BBY9&N}cZ=jE2`1nuyl*7D*Goz9EG|a;Z>)N-w&&Wn? z?>_UBj{&P7z$;h@>io4)acovyQw`qa5hOrD&2q6enEpohNqCZ^N)WRFb!=Wx{$pJSFT6$zM-aYx$akgXIxmyLR zr2=7wORKVm(UbQ_lOe5FyHZ`vH{}aP{Gv1j%#SkzWS>H$POCKGqT}H= zxrCp***iu!%>!WMHPF2XTv*YI5ed_m*b`QS< zxi1DRy={d(q>hvb$D<&KBflf~Q4_Ez*l!LU)LVD)hf()*e#;F6w2PS`;C^Q6?1@3E zL1fQ)cc)3+JzPO+O+EGvdgq6lS~V$TO2(M2NNJ=nj8>ip578aQPGHZ9O3)~52cokt z-++VD9$yt6vgYL!2Ec2g>HF>qyWc=v?U(61llN;vtzR}QIe4HBaqm!WxtdGC_|Wsxz*^7p$@m(MAY1m#WOq;qmny*oMB zwF49)UAWYz{!uH(SLI^7`-B(?jNoVt;}4hm6aUuP?&0C@GV#L;1ADEp!9Rj#uFeJP z96~NWBbM6#MbIldP0J$PN-NTDXDRQR>H&Uls924?v`wEkN?6|6J(!Z*D_RX6?|7j{ z>e|K)*T^r9pG#Dk%+&~`JooOtE}N+szxKw5u{0~$R7(oqYy&$C+CAtrB(0Q|FHpyX zQI1&46rI*zEoIUka=9@1hO!UQq2oytUT4KaNbO>T-0(c0ND+dZEv@iE$tHdink9Nl zN+qj7@~%cHPJjpKd9S-Ik7{q`9R_vd@O{nRFKfUn5--PnputyR_gPh25n+p}U#MUb zzvI{7p|y0_0`7!Q{8}KcL>{gg=pQ@#eL<8`Uu1<8c$%WBtSU)9zw@G#t_Wsi6beV+zyEd!8D%&mQ9AQ{|XdloDOsa%il`G9v2z&TH7CvO;V2ILHh}!%3)I zr@ZM|!@zh2W|lvD$>8LBfoHbr6ABNe*JN~*sO`M=l2OC@<&+7s8DqHV?vi&@_E;z3l-U%6-YW*ZHGBrCC6k25v+E8P)J- zhV>hfYzdhm*ba@_iF`^3V;8xWa=T-FJBeA)N_I7kv_Ggidw_T8E2iaJOZAjsLDB{o z2z)4*MQbpXCm;~)9cCzJgbcbnF5Fa`hPW=Y=Wj#TRl_51;1q=uUR&fI9qP ziJjf91>jS2uDn6d#~PPJ2axtndBg5fJR#gfzW`dH5){MFz$EyjbMy#tdrS)T{V{8o z;R?`}-E%eHq{!@6u&46)2;}&c(XN-_mYz zPS|bRW}$`o7@u~JU?N7wBQwHYp54@86;=1Q-i)J9-9TJ{3~1UuZ~xL&=OD>tC{)Bi zc!f&26@^m%B8N*8{OYYgkwEnzrw>jm90+{i-^+u47z(KJGX>2MIpf|^x4}FqW>ft% ze0+MCXAkdybos|}*HB=Z9PANf<(>chR-8cDxmS)fA~lovUwMQoWy;8@2@AdmESVoK z%6p~2qobEpAI4rpXz1($+g<}X7A7Q&9Sr4*mjFD)B{pyY-P(SFnQssRh)qe& z=_9)fsv`A$P{F*f{^5!##lK-8nYyz=LF6t%1Gjb_B1OZ%|L@UTSHXw$+;`+}>x$*| zx0KjO41&bZkO|)j-||Rr9g>-}=&-^C*BsM#|5bnKxJS| z+|`a97>Q`>##6x1WRSt31o>kMawO*^5-A{yWtxO!?|uGD_BjTnqUajBH%c8H%5CPK!`C|Rz3V!sWQ~e5 zd@zTm98~QpQ3!%waUB=}7>Da`1MLrLsI<+;vv(|%E;hT)^|q?1+_N1G@dk+D$wU=2 z&DEm$4Vsh^WlXA`Qc$k)IULgX8x9>BiwVf5vwqc#NX(?7-&?>OMIvN-ak*n5z>ACG z++|wU{!WT@oxF&+IE-hnt$?z%Ja|+FX!)^_iCEU6Uxh11AtbZ}B~V?frnW8TGlAeo zbo8Q&758(sj=Vy3lyztuY5BehH=3L}Yg)+!H}-wwo`>8qXyLb*KSqQSTXEldx2r(D4O(W1H>G zSLJ0wfdvi!5($wp3z-W6>lVX5|Ex628C{Fg7eXC&p(hZ8qWwY}&oP)uohCj*CG%|D zw;9wVlOgrx>MSChHOg2q7Cn!BB-^@b^Gcu6a73F~=* zuW+XRm)WP=V*uvg#w|1yyMNlEp}NcGyCYpr69YsSLhFs|+e!h4HF)r8SC_bplSIvp z7@Wy9~&J8Z!5ilknKa~LLzJ-^*o zO{eVWm9Z_7?Y1`{*1mD{Gw&Z|XC~*OgCfZS6hCNMOuf(|N!u7_g4sg{X3v+W4Dxb@ zYOju~oWKR_FbD>Onb9N!m09d5{NxnX{prKC_m!&l`0YiXF%44FigS|7sQtCK(Y_-N zB~4w%PYzXruMd@L4xN4LEj=aNC`%N6%z^(E0pX+LNAO-uLBinz%=WCkv2t=P^<;)r zjKu-cT0DZEX)nSL>^j;)gGBtg9Fu0yxgg=@x0n}q3U9;;6Rl+?xrU!kNzm$7t>*G; z8Js%SK*!oL9sj+bkl_9_9#v)S35D$7l~;~u^o=l`!#6aOCOkl`!M6Ukai zC&*5+_-N}!+SG(VudDw)w%GfwOMGtaQzvfU$Wuh=2U#SAAITonuBpR&gK5V%^)>4d zVYL#m|HS>ki%)Wk;{n~pfuMNqN-}8@t;v*Q3AZAzQavOY$6M(s5Z_Yp`Dx^ZNy=AP-4 z;sl;qby&(-Qjt{JOHXUp4SG3}Be9ufpS_VnMwx()act0}TBrKJW%+fKq|<(9!~ACG*=mpVo|vVH5?({IO*Yz!cUoA!W+fsKpghFYur=Tg3u zQ6_49xbbD=*An%g1QCMr|Q` zo`}$YiX9VM`jfnbi1ysq4|QT9!AE~JelF*uN%T`9i{OQx~Kf4omI+ciknxi7487iW_sGTK~Z;CYEHJ;-CqJ zrrmp#g_^mKacEGdL42G>R9KjdtmlE*S}b@kz82loED~sQYHe=|AN1W`&fY+be@B&} zGS09Y_u4sRY0~+V2_KrZFD*Dru#g}L=V?mGcycziN6X>v((ejOZ-h^hl3;Zdq{*eJ zb<<&UwM(-w`d0H}2_o)vyFz2xzn%O5A2XSVG@F0IH2-AD$h!bTD)Q-5~wy#*LyE4rO_UA^u!#k=i-;Ib@GeC~J$-lkO=hNO)ufUZ4 z)FVnRj$%r}jj!ev0QtiP<6`YL@4mAU(SSy{z|7=@3Pkl60>Lh!xytmXH1^3z)a~pcqQlP zlQs}+^cCiBl=y80d!5H)@N4e3v(pe>IVjG#9u#hpXri-`5?Jd@c6XflxlpJ#Uusb` z>GuGor6X-EYBKj95hfkLz4LPM;B;Ub$MZ@eB962_G2qW0Ri#lkp2=?5BB7|i9l9~~ z6u;KeiBCo(;(5IB*ka5^&wa(4v_u=Y5GnB?k-8&nFn+m$Yh!OJ98sx;mW~sN{V$FQ zVa!ttVFr0g#UxISXJy7t2Rw5(%$M3nKk`P#Xh%E_0~&&UX;x5JOAh;L?pO9u>e#YV ziF7v9cXf;8?V1BTw0w`Y+}p+Up3cU!zxIY^j{(=AACtIS(Q9VD?h>{j@3JMyOCJz`*%2E!yi99 z`mwy%nMuOd5CdZ|&ARLK2G~PA(C)^1ffj=446u%mxzHj2)AN?wfX|>#=1tWUikl1~Aq#j98NzbRxB;$TUudcQe2Rpw1iTdCRW~ zqi%;!){M5i0g_!L#A?YF@%`DkZW$q+N{0mGNRq!no!Pzrra?k_t`bhgEO@_hGZYlTgVm&%Gs5>W; zfUk7=Y75NpF{B$kK_kTV_CAP3YHYJR4a2XqZpe``^C}r9EuF0>cx$H{+uYp+9f03c zY`t^ftsn_H6ZQF^&rr|pBM)O|#tqje8gX=QbYF)*?g4kSi?-uIaJ>q=k9KIq-g1L0 zgVjsufJefWj`vh&!%UFc)pWf>9hoG(+nIvpm$OXIMt?TxIiH|oHsH-oP7Ti z5*~AaqPS{pg<_IEi%h!p zoqq6xA@cYjl|;(sR+gs|ZhfvA%u~3Gikw2eo!$BHdF^_pm`8FS5{Y-AoTr+6avUi4 zp;(}$!gh98yLfur5AB+TXC?KC_V|zr}Su55b1-iD)Brn}P zy1u?I&IMV;Cp3^|q1%3gqc1vF0ap8g8l)TZvE{_IjQCE5A4=D@0fNvZHNUyK@=D_g zi9uK3G7_ep?vGJ--2;g?|M%94L*7tqVrA}{gN%SpspiNAC^lV4i{{d8JzrS8C_eiZ zL{sF_OsKk(LCMh}(V?4p>SMRdp!|sBRPCTp&7J#{Otc+QPoTkn>iNz1u`Q3Oy@l>9 zvqm2cn1;vo7=xl#Ds#IQr z9hZRR7{fS>^I_@SK}Ij&XeY7J8yFwDnz%6cJc#W;Ub;DWh@eTz*7{*rPkJ1OYCzc|{02WtvRW-2fsA1g7aGPW<$%fL z@kzEb^Kh5h07p6Pw{~V`C~MUA2p>4dLhn^z;t{`zs*$Z@W-vZIQ>%%|&llY(0u|fN z&2ygxH4R%~cQSNHK#37TsT6hmKoI((HNxTXEjs1TQ+5+wyiqkWl&Sw zj%dEIQd~?J4U4<%*>+fXP7bMjWI~nSD2y-@)t{e-!XGM-LWYBfJP=Zi-U@5Trw|6 zvdmKzT{%vwOQ$7Sh3jv-<4;bQFQI52y_0+UTekC;hg}1b4B6+Rr!jEjV*}p_!;|-G z>@AzJ81BW96AnDIB&>B!HBzn7c+B%`Wfo$K`3P@7NLO7eduycI;Ja%d#92c$PhpNI zt)C#9Ix)o-c9itTV^zw83mhh~y*Xq$m9QIi0nG?O)?35U81 zb2uaF1Y}{m6EWduoqZ}6h||l|!ISb&qEA?b{h#GL^%8R*Lx0rsg$V=#EKa*lAB?4px=S@l3J!<7{6sb$wP{?=f(hCl zN}9%1^;GjstbQwB?YtJq+#E_N-4OSeE@TMdm^mhTGnsv4hALpU{k?&NwZW9k1e1Le zCUD|nE}`O7g)J3QVz*k$5{X5BjwoD9)d0MMWpHhE&PK`P z=8whF=<@X0D&e+kRg`MJa#}3(A4At}z?MpJGw<9-E~vSH6U8Siq*Om%%B zEt}Qs?u_`svg5o_x`es=zD`k_Bab}7x{>1~bQS8)G86t}2}!TaA=FHGe1V3B#ZUAo zH#uR%Ui%U`wf~h~fyw{vs08-ua?`6%?5JlHPF8)GvYpY8>H1%Lgi)vzP{PVDmb` z3M)*Y4vnBz<63;Dl_0&gjf+k6NYp&NB#}Rx&J~PhuB*BNYkI;#7(IWPf z1`JQeqqE;J<3{i#+7(3X-i^df-{!t+qCj ze(`%U$v9p+p2!?~*ZJbG7dS1u3bV!hE?}5X7BZ68LgG6 z=XlU&m6l{PQhuHoU!8FM8|h^}f6EGAF8WnAmc=@j9=7Jk$M4^{m8rl=1XC8ynPR7j zYDe+ZyPknqp`7FV$t{;39d+~8-WaKcBP~Ie57`uVKFQQN4W1r+zA@S8?vW6^l8Sw6 ze0x|*^c($T_31miAF9;J+|g$C@^9jmOg3(~mGo9gRj-lAkRLq7M`EqI!4}D zcg7E2>E2ZPRNYi4(aOtg;>?GV^xhX|Nmu7HwWIIe+G|U~-{Qgv39wGRM9DzHO+CIZ z_c=E1-)%cD^$25cxA>0s@J~n%dfn;91Lo97dBlY`CA}|%m*)-}Sez$uj;IQFGu89R zY*+#*k=x+$cCnOv*my^dH`@P^YJCb=u%Zb`jx}23)EyUz7bv#wdN9dwdWOf#tg{#d z4%OP%zYK6RVaFPH=1t*uv-dM#Z)3@K>CX~S=!lH$JOhsy;!%?^Q%6x@&QdQK9^&43 zpau81`VWQ5AMd@0b(wYF#$SEdTFNt2!hf8D**$+{tcxOma%Jl>gHqP9^U8cZpg@G> zptT_`5#2n$h`uKtjC$d zSfsv^kRBS=^gUgwJ8dIN*L!XAiIrt5ZgJ*$zT1%TM#-gXZ#OGiZwP^fAN{IIuh6Xb zszBm-r6FRd->jdrzNUV%dxj*Z)!Xv+Ow9^pkk1CE&5w9RP4{|+H*qk}s;#Nha}Q;H zY5T(q&z_e<=zQ9}Qx6-*??#+oE!>npNqaqoZ)>d8j#Wro1xekoCNV&Ps}FOe_QjA*|HROF|hHga&XGo@}_@!A1R{t9d_UkWI#*Gwl;z@_V_Z`R@+0O z_xRleli$TX?Qc(*OeckNBXZcZNU7oRNrv=O zk{!BlF7r@uy_|IX1#IOPa_xrus~NZvnSYS! zT-w-$Ve#)E1#|q`b7kUuNP!DL+(4xJ`lhjQ6{vgufO&y==wTa3fHEGEU^US0)BSKm zuIud|Ckl z!h<2bEc(NG-NP0|mrufDXVQVXjiaHuACBmCsHA&0<-Ilw&|EhE=lf8$6oLFno7_?r zUB-pSC*}e4xdY67YWRfx_r7qTn^0gdqqchZJg2G!mS+1mUilxN8cYi#S1ld5|k;^aC9wz5F!l3sBz5B0#D}PlR(#3QEd@5uwXl?_N zhHuR+fTQ;No2$AUXW+Ce`uh!nYl9~7|N5l3kq z%D&>qC$II!v@c~+`!y0N=}v-~_W<&J4PaLK3WX~9g$zF%LJ~@QG%7kgiQp$zxbGmS z{nrzDse+4xF^s=2vR5Sj;ntn$(|Mrv?i>pcGd9nUFW`7EIUD*pYarQ zybEC47Qi}sPuKmgaI`;P@vl$#>pF2M!qI)ZuoLE?8UEvJgM0bQfE&6** zJrC-K^JX5T_T<(fY?AQY<7ZvWK(>uLur+_(pcl}K5I$M_`^%;{=?LB~VCZ*%p|HPy zR4^@3c^%-Dk7U3N>=d0N=uP&+fqevfkxB{2tsd@#oZ-;aJ<`xLgc=CSUy$U*F$1oR zLgu^6uR@EVcI=13jTvjiP_F62;N7R+m-Fg~ zx$VD}#9z-Qn3AK+9+{28-N+MY2&{XndGIaj6@sRGQvygh%|m*ZTV_G<3`s0e7{jy4 zeOw5lMguTAGtR4>$TE9MwJy-t+usS?hZ7b87dYdX+BFVfLQ5yWDFa-6ra{yfuVGPe z{}WZM)czraN8+P_Y;4O?DMbb749tehZ{CssGj57owjF7aU`i}u8xIM<`s@PuI}Rh` zISL2fr<A$}PnT{sSOG&TIRlNuYC{hLh{+i6|chJ~UNQ9}NBA5m4^=GQl8hnzHCU3hG&RANqEvbX4bvOs@-&BnOAT>3M9{q@M?ciHDJ243oVNHxlMriiAh048JQP4SVHKAY`Pc@Lmu|Q8UHeGcy%#yi~%vY zMB=^ITqUwh9b@OO^3C{c%}Jmq=GuNyqVN#6S^-!$wR*upW%VeIEPM%8b^!?b_mfC0 zx{g+b%mXe`C+g-mgdDQBfG&dKiRldV7O7>}>m3uZmPmf*>lRW4kbnsiW{x{>m=?Ut zD5@WD?lM9WZnpc95aafS^>^$f1#R>g^BlK98t5A`y>yZNy;~22`u}g-NqLGSy*)tD zWo{kz&^S11XrrT3KiNDbux&7{FO3PJ?UG9L$a=P#n}E zE(d{7>bzxE85bwyApycd9qur|uZKhPWdsrW>T9d}T8nPOezssPFb}?L_Hc~2B8g2I zn8*}h_TWXrS@R(L;EX2zt)ZPe1GZW+aPZ6tP3AxZEkvpYpMkTnw&U0g!y_SmAli#% z6;|{g{$b?O^Q9gZ<9y!Y9)wJ6-`Foeiu!%=3DXT>%6#@HPytLOz~ZvF8K(FP0g;h2 zj}Toq0e~X|#zkF9fvjw9&>|03I%zTxc8p-gbT)oN!(|iW*ZF%jzEt^#O$zHWUUTgV zhyBu1v+z5plWJVAN`e1?=+b7{#q1YRNB#hg;!s6JAjJUqeF;PhmzTXsmNnO}bzn(y zXC$lsX^tFRanGn_W`!w;)`n~tf1q)eewfCY0VpR9W;}2B>^QHZ?h+Nde7_e+xQXB5 z+*f5dxeO=s2q@Q%C3>ytqg0|;O+;)9ruw<@zo4G$vVbA=WMdg=hA5>{8)!4SQp+y- znupy-ez1R}>$QngMw__*=y)>^5Y$KGJK2?d{&(x>o3gwnCg9-@p{YIqZd?2lV1;`W z+hLwe#{ZT3{=clE4G5T~N{@*|cL6br{VDcMnPVI*Ua;)O3Y-@RiMpc7KP6|Dz6LS6 z@(<&f%Y#oipCVT_w01K!cWd9bbY|R3C_d+sH6It-E}Y z#*wT3iIANS`E}lQQKr{*sx5n9wr~g8UqEnp_ISno_c6{-_C4i7>iv51zn_W_SBYSL zF$sBZ?7O#=y8?U49^?Y9W{QasG1Dr#K@cy3{z3Oc#?uwq3HLcU9Ljw0ZC9t-=x32u zR3T^$i+BGJcv^@8s|n;ZZ_yq?@^j|{xGGz7HH^nNgIGrQQ}p*VJfs9GjR^eQ^HRgYwI}1++hUhRP^bgLD;i) zOb;tB-S-^;tIlt`lLbU#$T?vS(0O-)l)9M4O{Ax~%U8JqfoJllIQG__hxhi9x@$F) zWz~;k!nnKu#W6P|w~I?&D4G#;tsv*q<26s7d;UIoE@ci4WA#JMYS$N~#Ow$y00RRe zqVtaZ8!G{TSLnOk&K5-SO!OU1OO^Jv@s&VvKb)>lM^*wsK%ZH?xZ06}TdXG)V7z%G zN5_|JxF6MnH6!VPoBUsA#}^{M5P98Ge$hn4v>WGIor(g>l>M`hbPHUdKzpPduZGn% zeg+yS;QKa+yc&@KZTXEag!<8S!Q1sGKNLXM_e1nFPT&hn;5vhlW5A9!jc6AgwNevC z;yze26@y@yfQhjvjb3AJ2}l&px2jN6b!LzsxS4zVkHwo7J$=YY61BEUrP$`wQ_3u+ z0Fjg2#pr zqS&@DCh2+h`%Bb{|6{)3-@<4_yPFK!Q@HwBOrLRpLd4 zg?s^xtpdN-=iwv&c=J-gpVSt3icZk8$lfDYl!eLS0chvU?WxkJZa~(lf4?tL&ZB6* zopg`ChbHr8RKQR2R*Uo1P-2-e_4;YnK%+2gNAPw<&3nlG-d%YU1tVC5Li{toptaXk zr}{hDQ@!(#_eTvL!vypnM?g*QB!%HFG~^6{IBy73Nc%N{{LZTGIvlVCApN*6s{9iM zN9G`5p9eu!M_4hA;HyA(n^t*TTE6nmTaGD1fS&4`e^)4-eE)dO_wXtJCn$cMc(-26 zrnjRrlIJJ9p(E1ASDZjq4(M7GK@9Aa17!)@-R)m_?0!wm*ks!qoTHo+P8mwo7!fW) zrmkPj4+uqsx5gYAHM7m4r`qUsRwlpg-jJ%Q(T|mOWkI;zOby#hqP)8Y;QV`A&YMhX?1&DqQS(MBpQd+4l zz$c^KzuG1`-u}^mD2uS+v*iB)Ye{KT9KHj!Q$P4U4ZvcYn1`gtSmQ3){gT=228bH% z{V-2rK_^wm_*11OJm<`i~>uDDBO5VdQP=W+--HH(2IKM=vGA~1}*OJ;$h zLO+D{sf^OyyZI1lzYs_V@;5B9E`G+>j}s+l(Gur)p4JVPIQ`JFGb^2JPM87Jfhcid zsYCJk3a)E6A%on}s=jJqV_rkzUxHw9+zZ_$I82HWgu1^!P;c!2{#|r_m`3wLQH}dR^l_}G+g~Ca;b%@V>YcIm3 z>y6w-0yChjUMu~b&CF2z^Hb7G1M9RtO!P%V5Dqqk#Lz#NI6!d-@H;(ULBHmuD;b78 z1fF%*WdS72hoq}F3vwnl3fhAz#`X}#Y|dZhrNkw_u|uFNGOnp#CgMQjjIqZHLC#?Y z6UnP9s-ISvh;*|_z6*C;%$r1|_8hofy8AzU^T2ot1cFTy?vi^Wb8AD;7@K23EQ&n5 zpR=H6Y74170t~Z?=#-?w%iZnmkcSEO(0(Z9+KPD~?5VTASN{G>O0Yhp4hG<1OWZ(v zSTh+bc`2WRW-Sw)w17_*K{P3VebALny#R+!2CQj%NlhG&Mr`*IIUcWXArulyRMx>l zD%F!$ls*xW?E&uWA{(xh?C6;gGHh6vE$gv{T?a@-k4C(_=OMF1-ins10uXB?kQnkh zYW}$c_|t9!X$<|DgjvrzvZl|!;P9U-Fyt<8hFhJzrONtd_&OYnw$&*;@5lde-~R8m{CCOodw>1kYWerp z`SjO@G11kVhsR~hi(p3 zWqrtU5+TB9)Qhb1U;qBE4^>@C`CW>{90$uZGgl~hQKixK6C69|AcDbhwB`s-1dhxt)RXAqTo}^N=SvH#`xdzVPSAs4gj`5Nx|aUU9g! zTDa8%Fyr#0f0jTQmjvHt{?*AczYk}0;eIa%AjR^dlgPm;A5y?#5IpH)wJ*cB?J{z8 z1`s#%Cwy~NeLp6u8vu%R6}#H4!v*%0C8I3;Aggq@Z!$m+g`j3b`f5Ly4rM2ZKv}GG z2-b!3f_&Prdljnb8KBF~`Aq zigJy9n7$f-k=bdGLAA^WnDiPLid};g>EucH*D+T?%G6Sc5I{Pl;`7#^$mtD&Z4rjv zk5VM?a~ngL|ZHLh<{vG7Y-Mq&xeSRw>#2C#QW_lk#Bj9uh*ze zI6@5wXokQTPy(t}gZv(gxI!eM40nfX56mRL^hRk2eeOW+$rBWu1rN%PAp3-5OSq<# zoPfcHvmP*++Yhoj{gB04?@X-XbMTqlQE+yWBKcT!-Be8RK;Lv*2S|&=nzr)H4&>+W z`yl-{MCL;ah?H_a5#7O~<&nP-f{jEi0QBeAi+chUT@h4KRpVnYW;QAg_I3bWqbJ-} zes~#Noj*cxo9Uc~1jtjPoq$S#=zAH!EK!1vo|SJE;liht@R@J#o|xN``o{sFFoV$H ztu-q`Ok^{o}qZb?^Kc>w>31bWTWSOABuhRiDR%mUTvx&%4F$$NVqHm(A@!my=fewO# zsWUyWb!>{f)!rij{}nkwd`%eB7%Uny{iBzaUbFiVIc=g1GY6|${INB(2>ogIH)4-D znEcK|vhM<&q15S9zFHNH8j#B8DIRm8l{S6stP>pdh}wR>FaHAo`8P6MPN6#b*Oau= zl(9H@yVc1>WwOZQ@@0jt3xo5J1s&+j2<+jAgX}G!n0!9P$N1g*ysy}+@L3mFSBZt= z$^jaC@_K;ZuX5SJ5CD5$eDJ?))lD0WM{RSXv(Ekff6as1$jp z#XTC9LVz7^Ucad+usE@o@ zJM0O{Gs-@?kQ|?i zmf4jSX|jPpL>~`C$U4O*PoO*TumyYlwRaeny1$}w1! zGwr*PkW6Bp2NH=%*!Fjib1X+FRX5({@Gu~YJOvrKgFjmQ0A9;(Xv2fmQy7j|#U&mf zWQxb7B!x9Q#Yf~uQ69u)1AoBv+aK2of$2aDOgrV^pA`6bL^Q|h4P(e9MVU5Kgby*X zk_mEU8bi8D`8)m{W+8}203(IHmU-8UXvBVuH4mlKo#G+lI{xk>4G%@h3J7SI`BQ_V z)TFV^?Tuz8&$=>=Sk$wE_A0SKAdTs%d)l6W(4um}77DrY-VT}$O?w7WIokff3?=&~ zv>pKI^4A?}7>*~yuw@Iye=CGawEl;{J^sT0Xh@ZtKQWrPPsL0RGjVWN4w7|qX?zEG za{)=-hKW`-CdhQD<)zlkbE6+)Jg-_>$QT!Gpnk|7;N`sUdrubW1I+3X9m90W?oe8B752&jOeFxKJxNiqwXZDP9QtvnXcksdIRa$vl6!q@RGx zE(NnsrI4QiT{0cKaIIhkWzTln^q#rBT7FXu#JmEqu&m0KM(H;FHbkUfO65&ld_a+X z`EV6w^PLPtT+zpWL8Fko^Dt#<9*^RK@Iz`Z=b=vM)PKglUzVlh&^3>P44S1Y1Un0! z0vx3+5lzIG_W*d6U*BI&#AL@(UO0oZku7w?%6l7?6in{AXBQ%NmxXCB5NJLonuiK9 z5yXM(hbu+xRo_S&Vp5+$h_p7x`8@-=xrlB=wBgDgAks-A7U57AzEhX1AeZ_dDA<=c z!uroSJw;zTQ{gLEj(5N!D`m)6{Qhu;X7qDR01^O0jl~Ixx(UUDD7dhXb4t&Fkgc%~ z1wqLs!uC*TOtss>J;MD`B;&jc5LzT1&ABzG`AGH28`Z&JFamTo(x+!d&6d^C60KkS+@xVJhAmMbveG%@M#{_o}-Ml zDkPIwz;Y9xh;)=_ZQlT0y8;WeIFD&5=p(CuFYHkUsTb9{fvrr-SZ z`Qm6asys0-zywG6cNN3G;t54^26V9Jp-oBI`SSj3qc`Sw!eOX@>RnwBx|;IRw+~Oh z2dc_2a~@&EAUuw2(P&&Sj>=w--dxSxAAqxIUhsRF+~3hakSFb-s++3Uqyq|2&TMla z{=n_}nB%F;#~nY?1O}9ophk%~*C}|Kltq|l*rY<7DhZsGC+%Hy+50uUz}}_ZRZx{Y zP@F{HsJr|e;@epF=DeX=voI;Ug3NOs9?w`CK~N-Q9$?>z)+ReJHS*G%&@_16S*+RS z;44uIPTD`w&loGR&Xj;%YQ#m*)C`oV~&T6vGadzn|*5BTmf7GNI z4;J!OYJ#p2`Gv!579s+TVT{T#&Yr`Sz^@CAdhE_@0+HeCOHq- z>=}hey~p6;{PM$HeEMw(s$W-#ewLZyj*N|!941)oIG4D106Dn>y>hVAU#D&C6~87x zDjzcG1M=(uji%h;iJ#M+h@lnAoSlRuA4SCB+<>UW0884;qE|8%%iC804d9!@l$Jfl zzm~s4x&})E>$i(?JY=%7-lon@41uZnI>A$N`IL{3d!CUw9R|ZnB1&YDBOds3{}ig@ zjQ9YZD)Ei4q*p*5r=IpGWGAxsv^C?zn=Cw#Hflib8$|4a=bW5xXZEigM#q6O6FKm@ zXe;bEW%9}q37VO;*UZ~sv*8h|dLu<_`2O|Bp=+W0vj@lITjv_SgS=icy8NEP;3=t3EwoS!!NcZqbGD)Z zINKXFG>6&U%d-HH&P;C-bjNZQfi_$OH>Bk=bUDhORT-N_ConO`8Czhn{&8YYF9FN# zH?$QjQiD=NR*sWb(=*dGF_+6~xnvw6u?e`0uBGQ*SQAEOwm0{kxl-ln^Ox|lqlkOz z_Il#8_fx7AVKlHZ51s0)<6Y5ypS4@tAoH4$cA33Zeom9=!<(K!ekq!6G_+ELIAEY! z$gNjQqHqh`YKcVYu9Vx6SRUY1+w(C9y2>O-&V(uo2)cB37OvxT}-I}AqKbe8X81h$jr3c2w`x!98m3s~|o;`s! zl2~gbfbR>1sEG^oE{pl(fP*5F7(|2gftv=JQ~_lZC0<)aOJg;p)}4yUIxzHM1QFsC zS%_Z~XqEn{K`6#%$M{)XmKH2r?O z&^&;if@!SK&3XClMHL{AoN7>0%>-p2`0ET2*cA=KKM3O_dT(*Jn~=$KIUe>4*JKic zK2Ew};n&XqTgN_UB}+4=u>kZO+o}wR9`-Ym6(Wd(XJOU_88#wlfr1MdcH7urf&O!& zIv0G4m?I^-o7Vf4rqskutk7+V8SHKh%~n=8RV)frZ;zYiDgTstp=ghDJ^f(kumA(;8bfP}p6R`qPj7OzmBH|_30Ubt^{3o~rSEswXhAo?2Mgoz}47RyP2#^{oT2jVc^ zy+GYHQTiKgq?T&Yl0d%2e-RbU(0(7d2=;iip1=%ZL&}uLGjHZ`L`;ps4t0VSMV65| z6bA!)yXy)oO%oz;HrRuni#-sr@_t3RLU8T^+it)@noz(x%>M8FsB@xh+NJ*Z>jG$k z3F$>MNz#+smW)Sf^y8DV(4?O-qUrwxoT_KsN62gwrJZj_l1k8dHi)`|E04giCv>TE zl6$jjVH3~6cBkzj=3ZH_>ys4v{x=_U?q0f<7bSCFktP$rs@8)&a8D z-zR9Y-ko~Bf90?64R=a+H{W?cAe!4lWEfWOjgbsk4}bMQp!&8J(910ITKd9}2MjIh zH3iW#1fm^?2ATn_s0`GIh2qx*`BnCEe205Km;gM{dDPF_+ge{(K=xdw2~A6rNj@&{ zUkUJWYi9T62Z{}bz5u#+dvEf@Mi!7(e`LVG=vLCWaDOpWb-jCmftFj4L=_`F%T3Bf zqp?!t(=Io&?fW6lbpimn+#*l1V;`?xPNGQ}2Z8gy79y;lFG`vQN>KQT7kfX-SsL!3^)3f>HyKJR_J{cVEBn~ec4^N+7e7F z`Gp`}rBbI84q9i4H^~#;%Fi6OA9QN8U>>hPd{aS?FYX?O6QrgX=vL))iZo%f5m6($mIx;7-$t;qjpLtnH4zN#&fkR1@6w4fsOP-QB6+z!e z&&zjEZCF=!sYZjyhDcwgt$f*ojo2^weWqaj67G}$LodJ6cGqCHcOw}SNfsHOtUhuHb2nke@{c%eg#C~rZb9uoY-$Bx(os(`Nhsny{RXX0HK-s zP5QE!4rH2C`itSv;1z}GBr$^5Ks%IvkpZ!mgEgN&H2$IEX-ld0 zoLi77#~eYM9BL#tTC>hwGLt2e|GMi1*3RFD%^~? ziXv*XF71TUyVzUMwj6 zMYXI&+NJe+4EC~ZxeP)w>pY}&r#l*4_ypJ2L0=|&{cMbFE6gSkZc)>_Y<{$VDt}qYFP=Hu0f6(|y|G_#HV91J_o5&a!C`ks@)AAn z&uL@^m5T3j1syP@ppmUNzThwZS#zvJw3u@JMvNxou|7s12$n>aPPj4G?46^0zlMKyTMgN5gPX#VZ#2#{BwicT2+<-GaU;(^85-=8t zuW);Pz0sKBP%{Ww*SRK^AbHjn+!^D7&OKFL@F)ftLy-{C`W%PdF()bmX;RLt!0C3D zgp!e_Js0WR)gn*_Mia21aJY(p1@=R?Jft#87(h-Rq+icH!OwG`639f{aJWAl=!0en zkwHCM?L6Y+xd?&Q*9T*bw#uz-fT7h8sk8g)4u+#b6u;iVoZsklfhTk{r3`9Ihhc33PPgSJ{;SoN)sORBe_KNTx>PkY?n1 zxOVDWewXsji-j03j2u&kDOIhAOR0x$LU5-mA)Tflblp4##98JJe%0OQNi)EZ6JZu= z^N78L?1oTJe*ZXfkN6G4qI+yQ{lIzGlrxHxPkS2b5gH{%>#=WsR&ehGa8V$ z0LHqxGXlYhrNCx9t0gm+sI+*oa-@q|S2Zp7|V?V>7s}9Ez!BQBx5;(cAS!`TjhR zb%~)k=D`e%=ShvW7BDNuUTpS`Fo5T zqK#(ZWX?^`W3!O4>dHvSCEdW){x~~vY8$3CC7M?RM?O?PsR}!xJkHQ-E)w=Nl{4sa z6F&|099R35b_r#*G2o678+7uNB$1RdiVls}Sk@PZe>hTljd9%S?e04&@otWjJF0ae zEth>{F~-eyNr$#tU7rT4*3cAEEo^|aiS@3R1;g^9KwWPGRtfvUNPPieqjlcYo5Uji zuKr;$OI4+G#6OAe&7A*3U1_Ks`oTS##Td_ih-CtO4kjELqBfngJs$(atjoL3nAgV) zmyY@^#O6hp+{kF3`PKgcilshB-W6%DI$2AL^9L0ylYK%;UIMOCzUl=X63U!Fw7C*- zJ5T+>anHK%U(}erytlh^%4`p~U9&zG5lT1OVol=ify8dANoi;lxwL>h#p-teCu@-F z$nl?R!oW>DkF^l?Q~;8(23e#*ZLdHeIM%mPu}HSY%MpnNz1GtrRnj0*fEU(Tv?lW# z4~Isb<>F3Et_I-}qCImGT_=d@5b+CVfjEz6_Yv$WTmHq+7U;7 zig>Y=b8DWQCDhFPg$nETf#~V&j<1Y>nM9)8hr(`!(;++J&dM@b)} zd9bEz?dN*f%1c5KO7jSJ?AWKNvXuniLZZn!51NwLEBHIafqoy%s>AL8zd!fl&swiZ zp#?7ad;VJN$M5OwwdSEArFoP=tsPw4qM`JJ{de7-MEB<-m{1tUO9&>CXyt9q+5lxE zeU9KDFN&jvmk-+u9`oLSa4m4DM<~8{f${9s_S8Tb^+F*;Yam{&(vE(1oYI)H@E&y) z_oaJ&ZxnZ;`Ly#ql1`D8c_5c-iG;h5mgNi`3rb#js#UruM1ER4MM z1cuaa1tz-byxK zJpT%}M{bjC6s-XW}Xs>``_f$7m zXtZ6)ujL@erJ3q^Qj*iiAo`jO4`96ETNIyQOtd>R$4P!#<A92cXrFNaO>ScN6d_E^WD=S2D$t8)je!A8%lDc} zCejKD0}CX~SmRQuClzJ8Xk7PVPff0U3*G0HPw(K%*}jC1tjjiZp4EW<^WE6m!HpUy z;hX1Rx_Y^PX0gJ{?(55tX);+4PSv_mNE*dTllM}s_y7Wm$Z90OG(xIvBQMzd1W2PG zoR)d3Ed}NC>E68x<7(bY2Pv7w1zZwqMN^kRIeeugYnz4I;AruI!r@1immz1Z-B=n6 zxQtUN_JMlhI&1&6)H^$n#9gG%J{Y$H*vP>P3K;z}*$ z0@n1C$;gWsBgHC8_40jW65*A*K(WM?JLgBT`z$&_Z;wFO>1NT6Vbf?ozzL7 zSKgCvD(u$)yM8+q4&73#^8+$S^vihWXhc(o96-6&FDPb&)b_TZ`qvElu2);<#(J7{ zipa!+CSe=Oaf&>8w#@=G3i*zCh-`;`-~7w1$`367rRgf;xY5$}NyHtS3^9?JjCb^p z!c}^W_A`yZRAK2*_lo3vW6A?Lho)2=-=}tQ;_*ux}-^ADliA9r;d^vkT63;{~x7 znEF7qq0lH&eadZ7%*#_u^&)(v0S1l{Kdzj_cZm-uT^Vus@zJMj;$l%VpSoF)JP{Y zFMj2vOsYl9#94bolSD0>pIaeXY0X!i&AqBVN=oDiG~30c`m@Z`8)RcOeIE6$#2#@6 zb;4NvNvQO)DeI7i;U!a}h`D&a@BG@VAslpPKFUM{-i1NW^P>146((zbnHogl*L?(n zTtiA@R>hX%kRC%Rhe4Fl$)2=2{tH(xisbRg*mXubuR?v6fBafuH3oTd&Ic)oGd0(W zoFlPw;t@G4lCTQJCYF;<)qDKror>JW{zCe}T+BM3!X?t4E3GH%yq}zS!E9cq_$8eK z<2vpMg#sZ~ZQFl?3coJ`D}}jedc9kdlli1WK$uj{^TO0g%U;3GG@VLJ6U;lk-&x^k z(USYw;n@^ka~enN-SHZ?7DzeTP@d%mlU0~(Xz2yPu_nob|BvSEf-sD3ky}9!# z2%Q%bpEDdj(qB`NXbB3~JngR?4z22JS7=hranjKxE(pmV^{l0&EVoY~; zo9XVJn2v2sH`CKo!!S&Z;W)Z&rl&dj9GxFM2S>M~`}g+!{R18zoO9pz^@`^;;5eii znyTdcgmOIu^n3zfZ>lJYR2uH*eW)fTj7m=sSTK=bw|L1PELF#!^gP8+jKqI-+!^*& zvqZ}~(KOs$EPH9g3{W5bxU$=n)X8n68ieyW2XUOGdj(&kWG6iM+8adyw`U8z%;#SF z{%2>09_%*-sE-)3nSgrn@1Y8)DcJ3@JB!@+11C(Fwx+#CmVQRx8`Z^vpoC! z92hR~MSFAsS?S<0T;3!>+FF#rz8XdjC?g{6(G7|7!j4cN7Uu@F+~l&ZbacR3VE-H( z|1brV_>8h`d4S@1Bwd8;9S~m-E2P3s_S^on4XZza(Rth3yY^ppR^ZwS_b2o~hg@`; zHxVjF4kb_Ms`=n&%M@}#Z<&+adxcgZssvAm%Yg*hG$b4n9XNW=5)zKPDxLTQk^wvw zaAj)DT}f!fWftzn)4r+^T9j?}|KSuZffs^%&dm;VB{*)@Y%ZEgVdwGWG74yyMBq!p zb}xk@?XcktAwg%fm2n6Ij;$W4p5_&jqC_T=r)<>8H$cA2p4a?;2xLIzcDA&tH4Y~< z@XaIMt>;5fYj6GfdodshlRKOnGqwiEO^sA4$}c%?6u#*{1Bn1bQS_xmgYxq~@%{sR z--j(p@#n<{9>`o_z`k`WZyo>XI6<+k#I@li&Hlr(Pn5TET(sdo24b+aX`Bu&nQqPe zSvv&9aFW4KDdJ?n2-QjSSBR)RN1I$p;D(4X*duIeX;`b3_nv+?d>s{fF89P@a(eyyWAz5n_F$u z{^E&~VSQ_bnJn{)3T9Y~c8jc!hb;lOq!bcZK&lKpFxsCqF>Vd~&pu6fub0_tYo5_; z0hjvUQ~X)2w6>>mNki3BkW-M#8^90?#$JEowiN`-iqr4~WVgtV?Cx20(SGImc&dDz zcQO_nc0kvRmI~xg`ZzN%q^Y@tCa4wH15#4_5l0r1eV1PUQF++y7I5dfnIyMY{c zp%~~}3SGJ5*@heEg213Zj1Wr#7U#?$0&LZX!BJ2 zZ}}Y&z{P|xQgJ+s@TN*24%10?P@;o{20@lC?>d{4+DT+$hQHcsZW!nJB?;^SBGo9T z5XHIWX~FyC7@T=TF!kN*5;I^`ZZa@2^G(*;X-uh?G0?G*{NuwtXPSi(19B-}E9TYw zgxvV69_3(&o zb)V?*1^oe&tL87!FGt6KEmMbt(e2YLklAAHA;Ha{y;1Tusny|;IoTkBW!_cbj1LXb zl@w^ZwRI_$rgE)mzoHM$9$Yg0B!7VmL{cyI{Z#`m_NiQWDBB*N>TcxhAd~HY`qg24RR-*q*OK?D-)d1M zJ7(jJ{&zhtg0G7iLEtC8(jv!FF33?qb|R#xP7Wa?HifAGj!Fy#l1{@04tI*8Yu>V% z0P6RHoI=4p36Cj-C}zmH;doB;hP9r(+h&$y`bi;w%2zpHFB5y6>(g+Np+g;tCW+QS zCuDYWFEp_9{SLYbjN94Pw`?XEcufCrUq~kc>z^4L2wHG_(LWJFbelSk!~gVLGrNc3 zj_ju#(sglkOM$UqZ*AX-AHJFh$5)2Pvv>S@a+@0%g4Lb?1b?7W#qR76nKg0Zm_d?f zrPt@c=weN8CP=oOV`cL7as_xg6Ewt50@M->Eb*v&=#V0stow4CrE?eK9A4OuGWyip zZVWLNLZ?)s?V{$yqh4)H6}6TXduq|$P0INUu4pgiJWEEV`bjF!0nX7Cd(tn}b>X8$ z-lP6mE8fcn^rg#BIPs~5Ye{VCrQvo&@LGUnr~RV8D%JUsVM#yp)(ZT8k4VBcQwASm z(xlu`0w!#Ec4lCx zE|CEC3Z(tTV5aBVQ`6h(NO{wosG>{M`|^rynY}j$M@kP%m=VUt?hKw^&>K zP)oNoT(=ES6DqWA#gF*Mc<3MLqolAob3b}~A!HWIJ;8ZNC2z)WafUz z9b=ISH-YvP9|+sls(9wReKFLT7Jqb!L`sPOJqwyZhn))6?TkI#*&T{esi_1WZ{a(* zH%64yJPD5rRP{}s?yo%B-6Eke9e9q*3Ekk1H_p0P_Plu_Ygy74T6w6VY@x~5BMCdK z>6e_+8bRu~7lpEiFSAq83CP$?3Bu%0A}-e`f~L1iw^hqK;RytbM^BZR#lXJvRUND9 z72pt{X>H`bl6^s(h#4H$`PK#9LUckgFoN$8@TqVwH1Xl&V|*#v-Z^|nIGK;X2qR^%IHgmqf6q{O2;E|V$3T3#y@HZdGgZE$xFy>N{#x0Qj)=jUUoTZzl6(w#ub#D4E2emt>HhK)Z~kQ?@Sh9g{-`%DMJpH)(fJV+bJ zO_%LNgT5rFgbOVEr+i-H!KWr4mGFHzeK?m**$MNSM4LNYwu`a1fF05`f_@j3;!r(~ zagr^>PiumnE!UR)Y{~F`{p!4@Yor%LfRNWz_1ALs^MB3fu*=X2v zi@>?ZPWPMKAi_`5Fuz>oAGPRMrwLd020Yt@xL*{bt@vFF1Y^k8*7dL7e^HpU3YEk8 zL}i8vp?IkDzmET|mA=0Zc%a|@#@VY$adzm$=Ie*s2G@v0n9 zHFm*{1_&aHh9pll4rjnMh@&nMja0IV5p_(P%#|z$+a&4dzN7Ofs6OI``9=szMfro36K4aS!EocOvaZf>=dY^#9_1E=I-EUeB`DA#>Ix|dp)$F zheT|$z;q)E!UAk?d#+|K(c5X4O!UWL5OGA{EfA8yC{F3QehMJ-ejNTEQ44S_c;{+2 zGYbY|+Uusr>al2__MeEhI}~^&a%nyRAL^VK?xecmK!mRqKrx9HI?1lBN@x5Cm-L61 zrJSy2TV*x?9+92&2&Ru2P=4}l*Bt01CAZ3`Tm??`#+P))^nl|6o?-gcxo~eKM_%#^ zMljtaO!XjZkjp7CZa?L#V3=I**9`oOi%@oDG9$2J#!;NR&__aa6J_IQU(x_Vk+uEM z612}i!r;JTWNz~gQ0J>=BGFSolv8}+dIPYyAa|OJyU0cL2l-^m} z@Nc|(ve$_NPR_FdndU<7M#8}B*6EE5phDF;HD`RLO4^vCn&Z>_Nv#8fJxu|Dw&D;_ zbugT9Ft?6@jO>E@?p7uc_xM$0kS1uyBT8(L3g~2eC1`s2R^?XU}Ky87N=s zW!_XCN#P#q;y8Tbwr8W;gCQff0thT!$c^m+5!^a>`HGRWh6~y{O$#B^y2bu+zl=gg z&g5#Zr=b_WOH})>hdbz3O_{{+dLkG+!kU1wW8QuD zA8_Yug`kdwm5<}BMI*UC)Wc)|+4ur%a}GpYLMSLq`DO=GOMt&2WB^6J;b~Z3M}PQ8 zun3Y(4FvEk5On9+zFXENhi#p0=PRoK_iH&K*epO~?+VH%;n2pNsl>TGDZzH=XS1&RWq zcsx_ALig>Et%!m1Dx zAp{3f;x@i$mmz?3QX*F%#U2qD2pA)ar*W76KIN?LJT z&bzzCVhM(aUhiXfyj{-s=hVn0_a!~r2ZjRMhquewroFcb1QP2>8SpFM7uQ-K_dM9X zFeNyW4Cb9HjX8{jx%$5c9&@Kgtv#2Kc*DZC)_6}Xht{VJ$F2lOZ-5G=exAS&<6A(5 zYui8r;mSa+Vx;8qe9dI6$f7<^69@|4G|F5F9?OuD0pq+7oJf-M^IM~asQmkDz>?-B zw_NEAbJ`F;Ah;2`%oU9mO$+>P-d%qUBnS%5o%Ew3AGVe6A2q9V7(}L)qkY_VZBCBc zK$4Gu8}w#pYDMxm!jt}~1A7aAJtBDay4!&cL(~rJiMz3brI`Bq>fQB&%o7Sw;yMB^ zdJG;~x_!GXQ!fJ3v1WIK#0`D^_Py>}#1Lc5`bU2~to+<5X$DD^n zC-CsL44Jj{RI68q`R|L~;R3O~oPkQf3o?>J8+uZQ@Yg#UMZSqJm2g99SYP9L%5Im- zgGcV;A&oHIqcO!9dLU7GvAEX^k_RW6GSWvkP*Z8`6W3V5^p?@cUEF7$8iE%}>=TU< zTABER)=*D87|n9v-7O4%`Bg$F0Dx)*5(~xw-rt_gP1_kjlXj)Mr-QM1``qySZ<)$e zw9S=&fA2R|w0YR6s&Bv(&_ci}H{_r2!HOgP_9{0`Nlqb_!+j`H_7&JATb54~{4#CvB z6qKH$beDWLm5|~V0^6e(vwdo>a)`#HvffID#BedE#jtR({-Lk^ZHr9sNX+QxBx%~p z3jK9@jB}(m-9&JqsconbbI|imGB-H=sLTQqf>oDc3a~RS%vFC*&b8dxKQg_Fgf{{w zBbAN~(~O*6Phaih^%nFJ%spgn6~i@JgW!)%67BAR$*Vwx?q`TpX{`#MgF?OsBU2P| zFTU*bQXVk8t#p;7R+$m11%z;&5^EK!Npf4%17a5T0nzSm(&3YFQRKlCIr5*t$CUdY z3IPkjDpZ0pHq$HBdz+cbW6wuAI|T;RUXzomJR0yBbpK$B#WjRUmGEFYJm(x79+7sa zWaHL=QIm>Xg-`A&2JV$E#!>T$)@BfuhCIez4=)XKYO83a48-V-KA5LWq*fGz?2lHL zPwpfb(yFDuQ-2hV#6O}eY^FMh8AecX1QPgf0i{A$kkXx0B5%LrjCYGBvpBx7^g#b> z`}eh#o`2dfm*i@{c=8E`wKVZPWN(3Equt~E1|X+FrB4lyml1a%JKiP!;=F|uAZl=+ z=DYaN6x&WR=EZT|NYx+~f|U3VTV{5KYB-@jxrQ3_IF9Q-OI#*#9=l`^k3AMUU(B(~+Zx}1E@ zL|@4U2DB88yVNP#lwo2}=PA-ysrg9r-w4pMXvQ(780cPunADPxy_GT|77{}fB$UGq zp|YTu-=0)3*{yp~!!M<5#E?INdmB7X5G1;M7fx$Q?XtTnvF0#5t&)IwAMJqH>%QE( zcN$osx0+2(9wFe!d3;PfB$?no1Gu;)ek4TKnT1dJ{NG69?Q~9wiUPiWni0s8D~wyqJvl! zpWEH`@6gV9WqecpX9BG}U2v`rW@V9Zs>YgvVjOF>0aw`NXeE=Zgy%*Es#85M@4Yn8 zb;Rs8{?9)rGP|9btj*8-(sFe^Ss~rw7RYvi$6^9KfMHx2Fq=|JX^pFY*bOVXpBW%e zA67IDfLEYPC?v%FrXTA~t-is*GgIKuNNAoot7`SA+oM6mNp@u-hs!<7KC za-^LmLO*^IBsJ(#xE*NQi$6zx_j#`^|=P|FRzPbSLF>!+{kkLMNx>X_|mA zazUcAHZ+lt59g?h-g(5nR2atqK$CyFX4-lk-0T%f71Q5L>)`eNSTQ-@h)GsYvjg4K za7@V5i0CUoc#zOp4F<1TD(qvMs>I=u&SA`uG%dkr(RG-on*O2>ZO1bH48p$7B_MFK z1HcRZ`w2#H|R+smXyOev5z@JyJL~{sguyxH{aD`s)f?VbOX07j{-vvZ^Y5p}`8PR${QWcZM82_b z1x2L<0TTRCB+it;lsH?2GXbU;VtRCp6kq(ezDNS6z)2K2PB)n<`IJXMAy=EFBgZGx zsF#3(BDW!e|Nf|ct1acFyzEL?pVZQv7%+BmYOgw<;UxG~rn4<$jfFFVHFvtzDc`B+ zVsB?*ALV}RO73nXdWSy6(`K3a+gNr=IFyNwH}vwb1bc{* zxHuPIjhGYJs*^7zy&x6E^@Z>$d=7Fbu>Ddy(72Jy@FrAG9Lflk3BIUzJkQc9#}Gaa ziIj@@5%y`QahEQ=oP5DHD?Etd1m(j?b@mr#Nm7rfC``1~G6sNkFhHE3;eb9HHN~y> zg)52}?Bz3w%N%&YW+lv+M|&q4$|Zfn)aey_c~yd2 z`H9CVJZsxl0yhOw4kUO|VDmnIDRe(R{0lrbxEzy4Fy$-y0H>OV&}DR2MhL^9&M$@g zZF=)`>s1N%S`a4%zMK83G4XZ`5B<^6N;Z2n?>P zFBS@Opr)5j3@v9w^AidSB)Skd;aQPW6BQ?d+xd@+Tt2hVX7(SoV)$y&1oyU{nuv!- zG!O=-Tl(JBbo9{wi*h&!7TPWh8rm+4S)p5(IROTMS%4xrFA}9Ni#$M^)wu0#vGt$> z(64F14)xAJfr~}6AbEN`Y2e1gHz_c zpECgkfyR^LNB@5ek9fXbl*@7qy~B<2i&T&tU;_jj+Jaw|P&MJYzTsHBUr`Z;m;r^X zg#|^(iDq~Nmi_sKXo$;AUk{^3qU3IwQTr7JU~7`PjL>hBu&SRx^C2uLMW)Z68$Y!H z`xBz!)O6prC26v*MXYV4^Kip&b`t$I?RjU$W+|zOEB?H z6mb7nXnbNoL^OXhd4btzg{Zb*ZsPUseXR52^QimJC;t)C!k6z=i!OHBG%wcIH!Wpk z%ANv4)i|s_K`uiTDJVZTRVKbpr)n*{l)$oGc)iVBLgvEN$J-&;;?iBlDfxHDyNtxx zSDRx@;&DqZ_q+mL2N`p(+8fRUiJS&4R{=HCTaanD^tS1hk?G67{(WFbd6|!yjJ^o5&xQg^k!#>(AV*lDwJ!B0+O2;M>C*N&Z6W5cbYKId0pd8@Ix`&L&T&+a3 z(nns-L1ldfUCUCd`d6zSZ>r*;FExcm*Uo$BFRHC~gG&D9lINs2p%_YQhwa?vo)vMB z9d!w(@RhXZzT+PZb$SXv?n`&CP9%nSpIj-F%!J-9!p)_*zPM`PfYy#O$TRv7ktOkQ zNDE)EIYXb#w(M@9>_=pu#a&;DQ(@VhD*b9F0?prh#rbS}k{D8`E z?-4nf1<9Skp9N(| zOFURXI5)T`w_uvWHt5g%~oGxw2 z`fVi>XR)+(02#B`Sy$QPFapv<##OP0Q@Ka46Zfe%<|i+rotWf6?_A>0OC>LU6j1pe@+Rk8OKR{JmgSKwKB>_|#~eX>RYY4R^} z$d-FNmCMzn@#t%wSqD@ZF8b^4zmrj?IiEZhuDFo*q3!`^T(eX4`W*%h$2UK#g6xtL zbzbehHtxQx%?`b741IOMW@dsFib&?VBd1p&9ZJ5-v@((Yt9t&v`Nu+N@2((6{%k9wG2%g*GPf)1l3RKQ9vE zAbSexvx$QFrAYsQUv^v`NN1+Cr~>O`5n}8-&i8pDE>`t^nhB3{Nc^!3JpG>yaRLnv1T0Q$y zNy89)04%@U3{#Do$(KF3g35IS{k@-aN7C#Lo=jxNsoac*Xxec&jR>syyj|#z^j>cY znTrlKIx$9m-tE=8>iC6pNxssmhdV*W?OjbA>&8ori@fPe8l<++Tr+rgMgut?NoEWd zUr%0nw3j;5rAj3R4BHI8kEMS&j@AJb(Dxk+ZhpsQBa-!E+ovr+M<;HU;jW9JZHj?H zFFFr`8?^cFD8$c3sswvu7JfGZ*91h5gtc?kpk7r{pCdafNcp8f+A512l7*uxV8l6A zwRtArOun7_KQTvlZzpHn=rymqcQ-$;{}63(9lX8yOtLV2Hnwr)w-B~+h2)%`lLH}c z@*wwbt3JP-(7AHTy>QyFXsBt=eZ0slkn*veJO@l`gx`5NK}^eyvjuQ) zW*@D6D75$~JH~Yu*uHPa_i6#5wneZr`+RHSGHC3Q)b;E=f7U* z?t{JZALdVtf5;=a$wTsi6P-7(bo>=P-8izq^*Z9dXM0nR)i6np9UTb%&ghAaX+%qa z;%&ThdI!O>#zSut+U$A79Qo{}P|nHs0f4kI8L-+~)wCA7MtvCT%oC4tVzPWzB6dJM z%iM0Td?vtfD)5>idxl~|G(fl8$jIOkYW{k;$N!_E123KOgEZFUvSKZ{>n{}EAotBcafWM-%E1u^X0PH55C3yC5=o@ zTyi?qbtvW?nOn)w^(FG_d{ki2*uBh)&s{L8Cj-zU`_}CQYuMk;QFpqU1cQZLWlx3V zyh>@UxJ7gmti8avCw^*?S9re8~0~FA*8bjbQAmGYhP2qLkqZJ`m@|V(sNrnRs`N%9 z5Q#+Vb54K)p1(+tm5D;DCh0d}UVR0P!eiBtbyaq`+d4TDTjAWB z^4t%$Gs2F%0GGW$K#_$tUvElej?5*l9}ABu{MU9yezNix{4i-CJfE2bGzvCaIbQd$ z`|_(8Rsbd7g@S65m-3Z$j^9L)(Ky@P!LH_F6lf zIXcs{?+mfjUUX_G;J~lq;Z3jpv@rj9L8#er+X@LpzJefMy)OPTh_dsBEwMXZ5j5Iq zxb4v~UIQ2L2p{#K?iS{ict^{?mF!(h^tC@vRng%Z_{zzVN3trk8X9d^AZYV@a^XV~ ziq)3bodtqJ68h1n=jT=rR4)L(>AlJIxlt=6hV0OvK&?O-|FKqKL4gKdJ zq@9yr!!u7)&sIM$l`%b!HJC3!`-v;&o(3n0^RF_PJrjfWeR4Yf$by743d_ZH%S9n||4^&5F7P7GA& zh(Ii)y>8^*&o(A}TjbHYj1~lZ7a{#}GYmKLN7|ilWpIetwQ0J_%(xons1RzafiaQ> zazfLmCD+{8asEdtyUqg1W+7-0(*10$5x6yv2EmloY>+Lfd0D3sJf8JHtPRm?kMu6u z1dc5y>bVlHWCi_>bQG1rYyX7yJ+q^Qm(Kc7_Rvz4k=tw9(75c>^-FYx($ivWJ5p#4 zji0>aR1zCuO4x_NHX z92<1En4@neHK~Op!yWxRdN4M$Ne0`!Sr|bW=5-d{lM^Z|#}S`}4y1Nr-gEtMe%6jl_s0u&KP2&kPHXsR=dbz^ z3S^5B=7;H!(TKJjWca5DQRhX)Qc1M@IOP_v2rVu@b)~_a${p=%utzX=3q6;Uujvsw z3oC9$kpjzsN(dbq57lt~gLyQh6g+&*9i032fa#{cW7D~#BuL~X2B}S_DaFfY3BK%h z;v482=e2~win%1)r^Ocg$LzJ*pR#Z^hMK@qRS)ARLItQ6&-JUg;++}vKhYyn=Df?SGn}=ja;+!K+ z6Kilr=PPkYAW4ZyVt%h5>8}NLhOgCi3E(&lJ%9V$*Y8*okBGk&o)p|^1P89mE%6JM ztkl-5v?K;AnWm(QXmJiTE3Vgw2iz41YU*NfW21xkfgRAe<4f8hnE_J$=j8vdL6nO1e% zQuPXn86N7Ouwsv^zad%tZ}z!ZPf?4UfQhI6pyYJ3KHQmPA-m?VPx)nVIJkUBFT^tf zANY`9PE(b)mVos;Yj$eohE&Y6+&&h|4pK_m9DLV^s3Ga+vX6-`tju=P-n{zz{FZ$h zgvh`{RJbBaXpcne#Srmo^(+Wh=C_n`v37*e){mxHH^D7b8CTp3+Ge_LM0}}9=B_Eb zCp0g_4CgnP>!DB0%BC1U4VJYKQvh3*eea1E*_3*qh zNn!qsKTN*8Lcc$lV2Zt9FRn{U=x@>16?B&D(U0b)>nB}%>-a^Vi5aaVAO5u)0n|em z&fCq7gw5Eo8L4&hP}M(_#e#&-R=MB~*Z)TQ%a0tjfPndz>$l z*>Amml{O!#XeUB`fd&2U#q$u43I-XzSWECTS2awsVS-7Rz9m{|BreOXlslAV?yM|F z%V8X(^w8>&B|BmH&n3yR8NLa9=MbIOW&B~}OcRP zFJgmaA6W;YuXv99fHe55@t{KZM1;3VOSxn4EU0zSL#HfR;>GQ?Hst9Vx+HKVC0~>j z23e8^qApXJT3d*_WjUaSmY(K%vE%WISTU62=pK5S>-h@Bp+;EII*%G)L|LOHBi`^! zLw{1$z@&`*<#G{M$C5rF$8@F?K6U3_cqb-Pc`qSh_~xS&yWABd(<&ZR8x*&x4I>U% z7l-<8pT5u8|GFg1>&g>f)ZCK3)p;f}%ech|#|lBiTjA{@M-MJ(VSvVe`UwUXz0Jpe z4N{~!Z`+coGJ>VA;Yy;Di2NHTpC~55)M~1DM%1;OIRLnQWVRPLh{_ zYkxIlJLCVZhYhExKBR}Y45UvnF6&-(v>htmcxyQ)=y8LsTN1G#LJeE*xM41MxH6t$d%l|vm0Xt2%k>D^kgJmJjASN-R@^^V?X|r<4oL@_bj-6N}SP-;cQCXi(iD;dm&(2{#*fOj?K% z7|X!)(QU}?e?Q=y>DItB#;jBF&_Ckbo4}8usELP|?Zp|l=SYhM8}ui97V*?7zgG^< z^*o)%B!d3B%}e}pp|sn$*_N+8X7tKa+Xu%6xx@)(??7FoUi0?~Ucc9N4OW_76<5^< z^*SIs{CCf?Qg%KUYAv0Y(-j2 zU1fXMyP?dXSHotn&WF0`LX>R&&nHI!CQVITiTdXC{<1kZ^An#Wl6pnQrshmmZq-7gU8n$WXzb5s4{}t-rnLA8!eC4bqREL>_#~G3m<+i;mkCNUP zgoFdbe>@xV*wM3!I^DR4ToW5ut7YE0%t3f}icVHrV2N}nolDJXEKyNIapq?Q15dn_ zVOoi48=7)&Vvhh|*B#ll^T#~)I~uY_@g`N4BK#u`-Be>eN9D~$jj|>og9#0bn@OZa z!|AfHjRys~=a2WrR9~90qO}DvShwpP`u|OPFz5Y*C-YZ#`GDL;VH4__{B!{yyZ@#Q z_I-1f_Gt*Sv_Dj<1S_OQiR{$Hf(gaxQ`vT!JQDh7K5x6IRe|EeyY4z(Ps)i`N{CUbKxEMv-XEpevwD`!QZZgo z9{&oYCCg^V?F{pZa{lQh09YL-euEmLh2C!C&7&QDuKXo8lv~nh@6g3dy>e*4Y7TDG zR`3BSeJJi?sEx!c6FTbACH^EyqbuIZmrg%lurpJp3&zq+O)5se{gooHt>hsmre44L zArBw3k_6cdEU2o0mELft3IuFvEFAtiY4tx7xt6$oJ&5_KKA-X{iz}9Ro|n6tKv|R| z`Za0S&fA^&`lK23qB3yCVZ9Oz>Y#P$gL9a4ktuyutMMEY@19dvuy~=A^h{V6P z^vSOw5x-6)c(WGK-w^_#9H9ib?wCEl1i%$iqRBi>l(oK5?4M+(B&j4riuDUNoya6~ z8Llo_uc1w*bx66fS_wuNd)|SB**9j-_B@3+?|}hnfFI^GvE`wV_9)IyR*1DRzWnl< zj#g^d&xsfXDYlEX?mjKEOTKG`j?sUEYWF$o`Tc+=g^Ep3u1=2tZp{B3Q z+_ASm|4TU%+4xUu@+&pg$vd+>gYN`TFvaNr!{+NHL+sDR?M4(+$|1ukmbq8G9OQ_g z-g{k9fp<^`^8ZRqgaE!80*A06QbT>V0xUT~(XmcrVm5hg*I8!oY0I9CZOwFSI z5YZc%DyCG#b6#iU>-RJLy7}${OM6Q{$t1?NqheaKY30wo9>p9v$vM50z2L73U^(q9 z)gUT`+XX3HH*>E52YXq5y-3vW^M7+5&Pv62he_3DzPnx+shw^vr(1W?PipCvA;XD`?&5x(c5N&3 zo2eAyC4vpEkc8!%VT@<|6O}P2~yW_eQAwg7VFa$Got7lSVeNpnXeKK06 ztGK(q@f%q{j7HjzDo8>VbeFhM=FnG!$}rvZ^Sdae;J=glr%i~%mlj8#CoqKdxh7ug z#BY6-S4R$(Aapnscgr^0Dn7H4@AR%X{F2jkJ26361|@jsX`g3pNtgO(@2a~h5C$cH zJ^YI}Arj_^KAbpL*=leDF*Ffzipgfp1^%nQ!%%AtALI?gELub#`c)}D|8mdYY>d)9 zkL;2JqfsxU60l7Y^OkqVb6^_MRVktM)^qXm(a8C2yVlNhNjVSVD%8jqXGO(cAC3*ly zefHR=8HsRUxtX7wVl3m(g?|f9lGr7i#uTT|gZ~=RI$mkb7)Z5&%1#iBpGRFWCGnyc zua4XgbLR8yJAo;JW`^ivAWy=#foka|H71;&$Z9VqfHT;DrAW=N%qa zwosp%%2&g@i6PnLHgi8*rCn`zBFwq5(42akpOcPps!nqgfBiUq)n3jsEF#a~_HP6s zG0FX((3}{?K{PJ&N>hn(xsih}Z)MHXL4>}7G;$dQe{~iqc#P#3V69i8EYi|Z$>$5p zdzWLW0y)tYg8Y#hCAwL2OyQp!X~NZMT~m>+G(kdsF_I=Qpg5o96Q=UNs*IZ^NJ*fA zCZB$ljx;Wq(#$<&W>^hE#C%%1TXx9TT5M!Us4tF(W-!7McV4jtcliuRl%GRIy05V- z@sK0D2)jU{^GrjX{I<&9w|Z&G91~jv_Qyqsv(t@3`~5qc?6NvkLblgq0@_fjvsqs2 zw#*xaHi>vRJ=6xARN60~w!oy+@H%6fV`j{Mki||I!3!)=Xw=Q|K%*sLW=%y1+>85u zCo4Lk@Am^WMTL0uhm_k0%63m_QU#t%ZJ93?6s~*Q9TubkwYFoK;bjYxcO2GJ}N2@7NT zJO-Ft64v~?bV&5~_z44z?eH&J8bht^xp=xR=>>Z=&rN$S?cx6UukymwEZ%~XU1~|9 zT_xy8f-GA?x!1$izS{P?lv3Ewxx&g3S>a)=Zw&I7&~SYcH9w}%cfFt zR@^Tq&aU+QpikRV61&B_9~zz%%bYvZcW-AOC5v_`5KP5LwKf5M+i`9J@!`0NO7O6! z`qQn6>y#HYsx#{Fhfgw+QgdVBu zPp_3JiSLL$D}(g~`J+^&jkvNvSI@3#Kn_e-UI+p)OFqdaE*@ zjbS1$`-YKTjQ<^vv~kQu;BITd-PR_Q7fl8C!z4c1fhs*;tLEJ@Qv~ntn`Qg1=KjgA zr%}F=Vt@Net53j=s*?@HI$7WZsuv{;iIxsI59tV@nnR^qK?qg_R`6~4{aC}~cgXMF z>kK!>t-Dfq8_%&NIqhkxFKyji^kUQ{ltvhdvKydRMYndhnXwtXRFA+4y6FNeC$osN zw)A>-SbV2_xcW}rn*Bb*RTzU~F#551S4}A-az&w1&#ih>#U+Qcr}vfVY!MwoPJoY5 zj@?KcqvBiv&Vl0Y7%OqOB5ECRN)0c1|EOt0v)vGLY-A%&lXalVY zzL}buLXDEAr-F)8sTwOwXa|d=wWm4-92?W^T7Q3Ca-z}j`U8ICeeAgq@Wff1u4~~I zAp9GYbV`nEV;nP)zB#eOegMzKJjm1t)3{9TwA z`%Jd@dj}baL{+qOqgQ?4lo5#?`^l>vto0?$*O{fYsSN=BKKp%z^N~>yK{YwoSUsZf z+1}a9RHtvfxK#wy&uU1j$26dJ+-cg>7Z1F@Gw2rnvJSWF@ zQ?3)S!f2Pmu*07@xpz8Qs=adPJhetfN{Da#!`YMEp2q!F7=g5R{z9OY>) z?`?!^fH>K92Kybk80>x75<(NhU+uj%XM&aMmbWy_?!=7)#Al}F4 z12iPN1Om^pTn&qz|LgvjRpTWttCz2xe#$m#8p)ge2bZ$ft7BKl`*X=@tn*j=NM5wn zHZfV$M7Vsl6}WO%rTfxD>HV(wiga{49qsR5^1SMMiF?^yeHml=eDP@4O3W6CVnx2q z#?^0dZ;EM;J{7J>b1rT_s-L5Fqy*J|d?$(2L^YlqX_!vLdKa*xVIg^)&v28!5b109 z`2!kUwNqn}WdDlSjmr)$o;KXy`vqRxck%o{6axg}Mr?%~ZhhuCGZ~hg=&lg=YjOgi zXu7Se-P(ZpR>dYCnC3v2LeId2p+pzg5oC{BoS0<1_rkga zf5=B8CtEj)a4z{~<)#3_E-Qc=BzAAJx12kA4R!yf6OQf*xf62^PR{LP{5QuZQQvkL zZM+LwkJ!olv#`m0oP9FjW4h&YPC5t^-ysQ}S`yaD8Bx~1?ry>nx`Tpuu{064_9Ng= zXR6Q^u6u-;HResM9h4f`n9W4t6Xq53~EAMwz&!sh&x;(nq z^LPDX9W^hkD>DUhk4o{?AymPMOPtWF$olsJ<*F6wdJnyKb$koG*IHle3fyukZ7Toy zdnwZlHq7ZLNQl@P_jMFKkgPQz@1l&?0ASVy^1q^tvSfAMfm1d25R)9SQ<~PZvBc*( z>ECVQQwL7J%k(4g+%nm!(`3m|vs-zWzTGrIWhzhOWq!-kv1^y~H&1VsSdJceS9+LV zHYd08-7tPLcr!WjJ5P>TtD;{v=|NWD(~&?^z=&R0!gE%XVpqIpf=2Xp@5OA?us>&0 zLct(58QcmMqK|Q_0aEIum!NHG%L=`MeT>)}luI2f+iv!_Soa=`5h&991AxKTWAo#$*V2pk~v_owFk zAPk7k9UJ6IthFegv$Jwd=S+N1LhrXx*Ssv5tC(FMWXoZ1!_rGZ?Q~_o>H36{A3o8+ z(!*0AUf0dIwofmOp6Ki)8lBOe#HZkyBabNa&_OFxg{$mW#Y>L$z@6><8f@HIN5w{B zVNkZ^N0>8?s%%rVDe5L+RCfe;ix{$c!J%3+g@zov{D)pvDameA{3qvgsQfs>pAl!fiUsB4aCC2?i~ZG2X7uD5mw@Dh1Q;8Aw~w9g?Wq^t z$1g*4yHXc#hxUwceneqvUyGM)!~jylY7t&|1O z5|pavByU?4C&uq>l`Ml&63bBapS+WTtuHVyDh1ins2eJ*yDrTviSO8MI+Wh>S)bmO z^kRsZyuaZ-2dBXrZyv44@0DSU;reH1Tpha`U0Xf4)R64#BLMcrY9xqX^(CirIdZ7< zhY<<`2orTKQ2~NRKTsK*C*}ls2(FMvXEUsZ&CAlLQhS`b3Q{A>dh_fP-k5Kl8#y8G z++u}yGm)6KfopeCO_ma4(u?uQijsESv?P>q6p(w8zqanJZ|G-bI_;#Vii#B(d@;MT zvaU9_MlH*&5E>0ln!GM6%rxjml(TKV&9we>du<1(XD(>iSaWO`E-op{rpgPiN8o}6 zU-_@P<`eCTvM)UI#D9lsR3VS!O>0BD3F0BlNMc8U`h-(|a5 zPY37QdWGbvHXf-`q>F>-evfIN*1uCQ;#dG$771Mt2|lM@aa&nrjs3&Zt?M@@+Wj5$ zccjkmD*Y?jU<=OKJ=qL^>UTYA7QC1J8p({MU{R%BU(J}0GYFr`)QLQ^@CN_tY#P7c zKwrcCs5l>rERMOixwiC=?(fTHpO3gjpO*yRYY1~-rAqI-Xtw|Mol0(-8yzk>=<H zlF$1p%?9(O-Z`tR+{lYFz9n+Gp21@ zoVAAN!%*rWVLWZPjfOoUbZ!fwPU%7wfh6+x$Y5Yjxv9Nf<>PXk)2 z_p?=@Z0d6T^7O-VEDSo8x>+cRo?j;va0DYK3XUSX$xuK-&UymyjXb5cJk2D>`lJQ2FCYe={~ z&POpE*KUx&EAv$88Nz5962}-c_}*Rk8L1&I=pM(()7{RNqZynOlD8W+JSXMc5n9B_ zIU2W-+>V>T=$nuG-9lVwmsW147OUia{x#oZvDr~CN@OX_|Hj!HV$wsz!rxj}$rOFv zR;lowm{?3=ePHw0FX;z^D(*`q@4dV$jH8Oi42AMK%+2=JJ*XUkO z`Lj#q*_`6kyOR~tVLZu9BDziC`{8*et@~Qg@upc5E?weX(Q&&oJRyPG!V1on{3JUa zC!g2?cilO^evnw@10d}yY;kYvO0PM3@6L>`PHU(BDy`k(Hmy19$p9R+|$f?CIJaa?ZXpRY||!JdBu=3MarsycF?*(fAkz11NMz)w$iKXAl4mY0WM``JKU)<5| z3B=9|6vfrPnyY3*9CmgxF%l*E_u({2UHLNjS5;Wp<>j4f_{k>z7Z_D0AMI#e-V`U^~IugQh>mn<+zw=`mnLlAtwlyXnKF}`FC(x)cs;v`LscHO^&vw+80LO zLj!@(&a$8CNB;SMl98(3ST!p6eb&oP0}SDJY~j*am1AQX2XB^N&} zmlFjTsd>pJ%gZy;ex1+ppu4*SEl{3oex|wWS3aWs;&O;Ziq_i7JtAd~qcOZ)8Q-db zk4UL}$%kwOb>GM8sq?zV`*#T(8H+B~_lOQ${E>gj-rMAl7$SWxf3&tx0^4(E30`A5 z#p;eXZ1>Lp^en&b0!G6k7bh_2kn~Q2QO0DR-N@tK9_#)`clh|_y{mxfdS(_UVmKyt zRE&L2*YGAEHvm7a8=*mYtzFHzJ0skhK9?bH)^hz=*xmiwp9`Y{^^15!IRe1C5w6sI}7N0d#3xbC+opaqS_9aM)g|T20EQ(vUXhFC8qEn)d!y^_AJfb74 zUOt@TJRu^^&2Km2O788qJ1!V&baU>UM)D`}Ous_ieM)QhXIK59g7;GQ5+rmH(rcRh zzA}n)hQ017w87m*FM>3JNhao>H?W(wE9I=t>=spZ&ZtV;l|Lvj0)OBPy9K>Ho~?D* zvMn4EJ{ARiM{~rw6NK3Nx#2MH;4-T zP;K8%?U8beXR6c4g99w_+_-b2R|74iBOw5EN3|X}TCY-wa0{K#>oI4vU9oSCyPD_61X^GEMo~o+uMNTk5Hppx4 z4Ti(f%8ki!t-B)dR6-ta7ZfRlPb28FDL~>e)zR;1*3mkH^dHJ#( zD~ft(NHeY+ApvCnk`!&*`|Z3|%3HhHj72EUv*%p%Cg~`cF8w0T&_oVUGe0D>|)llgn0Fqhqw}bjfT53yQ zI^E^D{nGn_d_j(&wPO>N!_G6Vyjgs?X-oMbG>0QE?$dbd{x_xN3tdYkFk(sc*7j~r zUCWkRXX}k06eN-S%r0mWib@j{sMYwjRB1T-W$b8Tw`?IC*k|uv`vW(JYX|Vay7{QO zBKSldemmxa)ezTW8G{73K2EURI*Ko88Bn|(t(Wl zmC}Qxap?=oa@7=<^XZ4*g;nB6r#NXVY-v{sroZ*RL&@Z$CceQUt3--tVsx;1w$K8z z!EMX__jKqhAECAvmd_bJq_97?74DbO(%?Zl>zD1Dd;`9GY=0I@eNwQt7Fa|Hi7!Dw z=7S5QDA#pIDZ6frun>!Q4Uf&GB!F2L<-sk~~mTY=$oq zx0&5vao>1pPcQnTdr*`zv!Cx=@pGR9MAzHv_2#eU`q=MI(~PqdWbN;Ar58hs5c12` zyOu+$I8YpN!h}AsaQ3pu(bX#ial`q*6%f3EY5J}j9VD{|bLj&n+1+1YBGNpwYzIHp zke0>jx%jaE&MGHIIZx-|HBeuPmwFrsGAjG=&nG7=Syivef%d~}68<6a?JDnT_(3Np z_athF$^KGzYWSDcY`d`~slDogpRiQ-B+BJOBlXSUMt)Kui605}3VBc>5dVH)h-K>P zS6t$ToaM(+ZfBp8Y_W|Y@V(^Impw;~F*;vyZn6r~XKwQbKHs@y7K~EBGH=k@2SdZ9 z_Eu`MyTvRfvGRl&tGk43r2nm8&yoAkVRt6s=PyRGXCF?#x!0iPkW#fhJs(*TIQwY+ zSTIg>2C7G5-Fs)BFM0bV=sa>|rT!so$%EjR-|}N_g`tfvs986>ZCaI*D*ASz7^?TS z-G`02^9=QerM5iFAm*!6zAuG+xpvs2UTL~E+ojZeuxWg|`sM|p&)SOq3CKvLa^jXC zgWb!8;-KS2Uh4`sM$JDHrBdt((O6z?!G5Z0ZMlUJL9Sa-m&OIdZqTZ`M28loBjd9l z*tY76%-o>SUj;Vq<*y9nd#h!a+NlmYIQ>^Qx$gB-mgZMZ-D0VB7$Mv$4FCFUw(XH( zxm|*l1lNc>(~Oxa|$s^@_Lq{U2ZthlXELsmJ6?Y1qN@!M)$)e$Y`?(qHeo z@!+vY&JQZHV=`d?wbucpi!(FX1RW|$V&3Ie`G(3y%SPCcW#?sAHMKaVgy2hSGH#UY zP+VQ;Mx8;%HvxET(o=|t=`&qmGq)SRv<|TF+XKRiVr+wvzSskp; z6Z5$Q@-xkm88eFfgAdelEVwuoe~#R`wuaLGUL=j>#!{{sw5dSyy_6rEyg5LIx#=u+ z#-y82apbvOQyE%rYG2ugxWeo$}MHV%wh{nL;~FHycr_AF*W~ zR5X66!`aYC=*b zq^i_<9c>W(o+F+u)PrsGWF&TI)3kBTLz?d}JX_H@ejrQB)0iIeAbhtFQ+?g&K3ZJV zu|7zVI#Q;c_Jlp;s7b8P=_6?WZi^Hyns4=bnqbXhLsbiW{#uOf%6h#sl2ozk@_}A&O*8jt_as5*NWO{RkZ(@7L#jGa zGsSe5K_)U|u%Z`N+)QQ91<9$4O?^=8b>~UB5{SAZHq0$7G~}l=+fga)()#fJoAXzM z{;2JH9G~Jyp_kdmzHwozw|Abf-7zrY`^<^{Qo~?-NoJWcq_-w&73==oFv*c=v-_2q zmvWHR65FoF18XYC7a8@Zzw||Fyh=xQ0)$~_w(ssn`MxFOBXTjB>xT-h$#VC$tu~oL zb_g-wQr>pFEXpo&j!Kd;Syoun4F2hE8-5`#t2#Z=pUb27f)Q@c;Smv1O7}2U#B)=4Fb-cyub6?%B@-ocoG*@B^^GQaP zJ>)NWa9fa{A?g0w$qGOvw6)y$*hd&=%gyzzm7rsIMBQ@b_jBI9e1_DOK*7tD1HKas z{O4T4CV5tJQyo|WAF4Rtj!;D{sL>58qNoNw?+T`zE4w<8ZKEZSaZ zTrP&W8)@$;SGa_4ci*9X=EQ0CjJn?AsMbTAnxh*es$1SmiC@MM@^E6-`tLX`^Nyym zteDGvC2ye7{?w4sqL*5=gM1HzKRK`+3*#;EPg(KOt#AyIN&7ssP#K0#hCiXIExIM2 zP$_%M3<8ok8{<{{oTA_(wJqA{H}VYMe(agVH@^2Ei6=E~-lXq2d-_M&En-8J@%hBF6y0gFHiBlTR`7>LU$&x~ zVQ*WOTH6N{*UBpV zCV0_QT#oC+r7|w;M3lNx_^z&1=3Ms)^Zktyd! zRcOb0kJi0M1FXn&Hm0ja#a+sod|s#*#4Y(KdEP9ec)dN_UfyBX%j(2o#ra5TG>z_+-dk`rXeUG~NC2IiN^9w%lZ_rc4ky znM(0kLmT+%p-R-Cr80 z_bbOnK;gKBgP-H-K;#44zma!a<5EzTT;$gNmsXDrzB6iq8ju+F(H!MwgRicAz_WJ=p-=W{Qw?xOHsya!$uBh+(aHC0BfAG4k9pN@lzSx)iNS|>*c($Ux= zFX=G%K%X;o+1mfegS3TkQz29Bf-q!Cna4>yX>+5%u7@>cI;I?q?#baPFE zsXTxD$6X)v1^BVJ!m{+?IR9eAnK0$Hh%+5y3g^%(s=ab zN{zt7(`B>4(EaLw*)H!%`*c558WNUt47V8Rof^vqNRoK#xCJganCS0T7wvJ3^q3~} z*L-i<3uA=lAM3U_clu&MInh9z1bcjHU+28&hYcC&?HiLm{w|#YE89nq4oI`Xz!K36 z+}}Cd4Mx%*#y-Sve;^{Lx3fFJTOLQ1!&5` z4x?bAqz?$#aTjblc(jn`n~%Gm_$Lbzj&y>+S8BCv$9hZ$4U%IQ_Z9#QE=d}8MbAwU zrfbf6_U|1Yz(@xcaJ(4&>rS4t-REOnP;V-k06RmN?z29{Jzq z8DWP%>O!~1Lq8w)L1(w)0$lYuKPXLw&eK_&oqm|KKKW(Mvg}Koh7qJmACCKEiWv@a z-F$M$q}EcPVT&0GbKOF_qoCSur$kH4o~07*Xo&G-=(BS~({bN%aqLL0n<3{P4UX{@ z1VFKE{1^iV@Ao1`1+7&88ohrj)&<}J^&z-AUy$e8rfa(Qd;shqpbs&&=rf|Zhr$-e z_I?J84qA>k?mw}!-nX}|t!A`bzo%bXnIQe8#hKD)!wcYGjsdD;%{y6&411THQ$_1S zmrXIe)~jkyb9g` zbWEGSd0<-KyNAxdW^*U&I9dR;K*UJXZ&K?)i^s+w>3A?YSdbu$Vc8~beDar>djjyXd8+rw`Uo2UrGE!t2?7YI4Ki0U(ZO%ZR?R6OiIr;#VEC-R;WD@l#VDMfy_Pj?H|h=m#ow}X;c*At%LFuVGg zhfCa*FJ?C09a#GPGU-Gl#*cvC!3$1L$v3~879dzTgMg7L4n|mmzcb|-)v$DH&EnJQ zTu(L!PhArYhpo45c=v6Ta34@Qvm`+CC(Yv1KDqguh)AcwA=cxZK|+pg0go<09e(Y^ zvPvb5HB@QpkbJe?AAfZpsIDcemL1YNs}tnt@Xoyz@h21UGlWnylg@QYW4M@GI~gD) zMN*_+#{+7m{#7BvS#n3@j>K02SSsQpHGuA0efZ?DDiEbWH^_|EXz!>+!dPAFH+gBR z;{`UhBc=n)t& znv9Chg&?54wdElrN&yo2VF`iem5wO(-AYaHyL31gW;aE6&{Ls05*vLWod z*atKro#Iwid)xrxqf=J9OJ0&IzWmfXPTY>pYpMu@y%2I10Kve%0wZS6LPOac~d&sEPe0)RJ)ru>d0@2uaZM?;4@NKyLe&H#KD1xjcB=hQ$?=NIq!_rd*%%V zd?zHt3F`Kyx8jakJvPWtlXu4l>I8#$IZ5$O+B6R&*mfPlp^%270;%tMB!ot=O0ED_AkTRhv&uKmfb z_?jh5-%lTzX}$ZNZ3`L+mR$qUA&0~`M`}f}k()F$dzaSluPK?yhmJ+g9*Vtb4{ zN)Yj?qU*uH5(_iUQPPUz5<@v6Hx>dpu_u@8B!-LU9v3vfUCaq#L+>nY?TVSk>I1%v zAsXC@EpRVvF<1&$!u1C8vqq-)fV`~5E;24L$nKX8-p({*(@h#0=dp9jO4Cv!{ft+m&4Pbrd^l zU(ho1X#a+T=1klH3=m@(8v|r-$cNmK&3myM(Lbd+LIl+ZMF3Kf+&#~Pm<`Bgq4otS zL+pijatuI<#9^%S1d3^zo?3t8$ZU1@6X&0z0xlS3rq>J~#cO?f_Z1)CVL*uYb+5Lo z(AQBGC%L-|M@|{0eou_g4|(N=r!RapjmXx)vJV;jNDkvxTwV8!`}tIuJ#26(RwG8X zJ`)zt;(oOmaTtv-1BUR`KZC(z)&8@6x1$0qWqu5`4*)KXa}_wp`NNuX_AV+3l{Y-^ z(y({c^flxaUgj8NR17SaxL%)$PMzH_!Urp0rJN3v&vi8qAKcUv_t*-{yJ5#qJYD_&p4;cgkE>-Vbs@F9K z3V^+8oGwAz_y{OCi%`8wti)$KlsPVGi`3pB{!wyJx6(Zc`IJCk2I;-*{#2^gp{_X$I+rhd22ijYXC)cTWWUTU_*s&HmMNkPThgaQ&@P`j z3FU&<;4InV#M`%mLIAWXeNJJn9K#O&$rlC41#DQI^L#*ZYgsN6Te$J6 zUF99ZA(PrRlTui>mrb?=0Rj4=t@ez2IIX>7RMpDg6I5FA6UQV%zt;3;N}+B&yuQp% z6{x~1t@v;;#@U4`2M%}@^EO`eOOfZhH#-KY=XgT7TZRlRsi5nM2K#-^>nYh%47RVrn-@(#rJ@vPK9U1`a zY6n);uFxHlD&3+;N?2n1K>3db^-D&^xnj$@Io2jHET&;HD?4-DDAecFG$hY0+4Sp| zJW{SpNgvg{oMhSh2Hba?kap&+&DusryAWskv_Va&yQ;zZ4Jq)ewITVB3yWx;SbC1b01SmnISd@Z2Jb@sZT?ESo zkh8yg2+Jf^#WQE5l2p{{XoYkkoXFQcsV-rdPBX957YYdW%7hwDf>Ov-jG-|udat1I zbuE`09o|qC2!BSb-fYp^f*O~0s6MYiB0F9yM`Zu5Pb_qMK?Nz<_EObqdiOOpGz!|} zT#&2%MVu+yZyOcRTJ0pAaPSD=)6uI>Efwzi$S7zNIZr?)i_s|v}4bH|F^l9PIP+T+v!dK;b)M?>+jp)ajJHKwuL*6>_Q`F+hh zRW87+TnxgBut0ww3snntzo8c7Hj)!qII9vcu;!}D)+rOGe5skPPyTprkG!Okg`Fv8JWoeLBqkHnv6n$AA@sJ9VzT?D?42gT_CL zo)+KMrp&#am>Dxd4{fq=B zdcUJlIAufjPa@cBJ;H8CVvmrtvRigB7BdpCvhpSb`s<^W$u4Mzu&Y$gijK&EaE+J0 zlt(3M3{3aeI^%{(ZB>St`*lMlJ^rf^PIr_NI0se(D$tJ}v&&rjjM=LQNQHk&3(q!g zlMA`fchTKI8xnrNAvbvR=OZiXY#vd4D(@_+R@FR?JIF9cuJE}{K0MSW{9@LS+NC2t z04}7dc#(=~^c=F}HAWvswJ1ZSv_v_(WTz){$J77nn+l5wR&~ah<%bM89*S}NC(^rx zb<1LYbk4_an0>d;kdHDvDMVa0MPk!?PoMM~b#DA~P1oxQZh*=d z>cuKX;7>3Q6$II;hcwC(uKQV^2)`kyxR>FpH^sUxI!&da22DS`wa%2nA5-?oP1<_3 zOQ<_`@^=&95H$R{+|5LCNi3x-#aP)s>{t=uUv6Z&=*;Y_-H;xm+ZqJSA?v zh3fIa8n06_Z+3BgX2mLK?)WL(aZ*@)R@o%Hmqa7lQP_ zVXL!-4L(BcFe2Gm@cr@ljJ)3~& z-ctkwa}cuR%8x0umND*N!|I^1rzYV#$$qMaFGu-wJ05-vNc(AQ2ct`KxBxVVna7RK z^ai0o=u&J(3YR_%@9{Vp^UzmajZiHOw2X3z+F{|Ea=zvO*c+8FSANtJ&(z4$6Q@{4 z$Yq1`MSx&dO$BJGK9~aQIMX<PJbDjkjV~v&HP4+} z{#aPd@J;}xXU|6HH$3hzWZtmU=gdv6JnpTO2=&2V=H0&Bi3piY4>3=?$h&u+_hu$o zQFuhte7lt_`IhsyK(w9Ry#d7Rq~i>{BRVh#y3GdLeuh32CE4GY1bj1}q0OXvX?rz7 zAwd&L7y_yX$RQ%qaf;8;w{oFVbs-jn=b>EDC8~R6or+zVxKkiCu(`1JsiGO*+jDdL ziSZ{N*S~ovFf13mBy7FSVl7Mf0n`-%Li~ovsP2Qu$@@inH3gz!jcDeh z6m=1-Q-!(eQ-8>-Ks5Nw7I?iD*v`R?YTmB)74YH`FVPRSErBF`n?1|FV+2T3RvSP(_d-J zmaHO{d;kcFbYdd!TEh34x2#&TRb`1^%y1FDB@F%En2-aOsY18gkA0pfSRd*GhOEIq z$I8CrNH|HPo($2CCjtZ(MzzJ^0 z!uxMA->w%icD-KIMM!UXtygl{p;tFKLo$w0Kmutn6S~PqPG=SrsG34H23EI8*!%}04_QO?z!hQG1#epf@k!fhBomI3?6CvPXG z=O6@gkcyLkci^qRZH13(4RYVj=u&pT@7Ez(x;>ci7l&hO1)P60^aj!f{rMy>yc^pB z^`^qP|`81qPN_72^rO`p*u?W1oi}TPe1%9mLr_vi(dqhS$ zc8eccT5hZCs*hj{LkITR`<$+BxZIhAb#VtU+g~473sObELoBG)g;G?w9EqJ}i^gL~ zrrX)2q!1uQ6nvjrW&5q6#5#9Mfo0L+U)0my3*@*$t8035RO8u@rQ=pWr4r0eLu{i_ zHCpn3y64?UtTC`VU*mPSC5Blx`|5`BSPxBE)<5yy{gD{CRJn9eV|V!3*hmYk&q7h<6nY_o5;HGF6`$D-@d_%>6!v1d(73{0AdD1_W z@Vnau4`hUG9C0)1;2?tBvHK8W-+zMK6<%M5f!@dO(K z+1@pDs9SjpKIV*lT1-*$Ye%yCjeck%0YCU;30XS9iTX6Iz$*IsgO$tH&4q@ zw5VJ!MFM1fKV-PYyWLpibu)2eMKK{CWPVFDzpz%xeq{=q^Pw`g)fCBuRe}8CwG-xY zjO#p_J!oyOu#daem9nU7CEKAFcSq2p2oZ}>8EnwX>&Dr!`XXVOmq&=vLOLfv<>kag zHZ2#~9Qa;id_vU??C3sAnc@7pX|jPbnM1l>95f))$2~or+v`eG0tBf`Mac&1axc!7 zFjUhUh#z$-26+3oSR`eF>a!Ckx$DJ~lsFoU)0&JjRSE{VWBTdaJ#2V0^dpqi8Z45` zuQVv7U=-bQm;+3`1H`HYyt|xvH+ci*zv};fxY7h@d2Px1P*isddBGl_A7z5CL{FZp zk4RRUYFJF!-=@T9N*>n)Dql{G^cacRIK)ThdTr#HrkH)WvcOs}Ma zl>@6;#Ye?yLg+6OQI z5$yv>HCoNT{Q$rMRA?xh&ow;vTW0;+@TU(cevd3l(nbo%C!e`Xp<2d_mp7x3p>4$l z3sS{9E&VKaje1&Md4jHV3-MJAoMwwR?*JUXWC&T6ge)ZI^T5sDoR~wTzI3x3)w9n(^AKrwzXs z+L-woGkkErLI3=!{SX(IBfyAMz3}B?{P6OM;BeSt&`%9EmaZDQI8H28eDSsO88yOg z4<7p@(*t6f9~NS@6x9EgTLA`q#o(j&Lg8a025zLAM2kIJ7jEDeFv)`2gwO`t#jK|F<#_h}ncyUc! zTrNKb?`?|zWZRX&;=lp>iV3AtTBQ`AIrOasbW`jJ4H z2k~nnE$v_ZH+GIm%?e=sBxk!fmIa<%1C&KO%{aDMlE{5XBU3FN7T=!{w}VouJpWjz z-slAl=qFRmc?ZGDY?VM)d6=;$$nXMRhW96J4D_%hW~RH z)Sy3tPb*Kj4fhQB$l-(4&2Vu*m)KDcsJ(<%oG)LZrgeCS72`@;(V86`xIuoZU>8XJ zONOd#-?|v+_y!lV7K<;g`}RM?9IVMD2|26CSTI29TX%ACfFlc8W_#(Kc!enG`rpAQ z^cj+Osoyv{^DvecA=R|`wFf9c1G}nUR*s(4LR)7&g8D3ktq=UHO{qFB#q6&@5Dgi! zCP_sVPT*#81>HdqTTJOLA2``(1^!Eh!E+}?b`0F00+WDprSW#rnY@Q>0L3$~Lr^## zD9OK@+@^r{0f9LE52qkZa zM~}DGX0(`xlN)(HgRmpsx*-0pDc2LQn$-^lYBJ;I;A=W9MtM*YP{v=HYmeyM07^JE zc%adD=aNZU9H`0`9 z*&VjE{8uCV{p+kv4bdH@KElFIu>xnd0A1DfB{9RG-lcN@)PISr%ug`YNoowj9L+)+ z<|8@Z`SC=Zyb@8i`IjOz`KJH-C0DdqTiokM{ulkaHm83>bZ#QYya!lfH2y? z|N2^zX8Zr+Bu{}6_@BQmx-SEC$bY^$EXqDD@tz&|pK0>XEY>OfXAS)43G&Ya?w{?!Kf9NIwgmrd3I0FX*cWj; zfo^vu167^h85-npL{917!yIS*(Q_lH>M#0xKIE*ua{w9nL;5;J7UQV+8>tX){CgNv z6!#mjckdw!z_lulg;CN7i{u5`67G?YJevL=!IGaZ;7^p|GIbHK#-2o@(dHF^b_`yc z{LLbBE{NL`x73aQiI0kI226W(rMe{dXs}tzH2_|XeWqFCfT~E9%h<|~C`LmAz;pwj zN)C~n&!hf5ih4j3FdO99P|Gdv{`7AU#xVFF0h(exuJx$*ThB90Yshle-vf~2)c-m4 zX!{EwBK6iMzf&XnA0J&{1WbCC0QBgTxqIVUFJy(=Xo-BQutyFh48YG3)!$ZY2jDb1NBOG5Qdx`v^_pr~h7y*S(Y4Iy zp@0rwD#`nku5?RHAfx(M%W1&gy^}<1@EJ|>ho^RunK?V$W@BG+i>*)K%S^I?Vj0v= zB9s%ic4aANBN&==C+Z>#3CljZ#kAIOT6AlbnM>a&phA}@B39b4#-)R?Wb>RY@_0;6 zbNuJUYz28evPd&`ct7)ys;&6U;&gT1-Y0K2C8uw{Lrl&mZCz>3a?p;e79Qspt(B6m z7yBaL2)L^CezPeuf7K))Q>UBYIlOmxmxhr?Vz~9Xk%CrZl^PYS&iNkfOQpX0fI@4v z%Ozz89Xj&(TeSPP$ht@+mLqJZZ9tC6E$f?qW#h^IQ~<@(%&hlP!6TJDWNzG#}-Mln6_3t|HMW)N5c_;z^^7k6Y;_Y5BC-iT};_Q1yNJ+X zN~!TUfj4sJ`tks>?lOX>6O5nxRsIxx-L-mM2?yMW<=NUzA0C+y063lQGIgk;P&?Rq zkZE2IpaEp29AQ4k70TS@;Wa)X@rH4BiGxo4{^i3_A(a>AP$TdSF+&)|w?&*zdzXp4 z$Dsv%3hoM(+JAK?y5Z61>qb76{Fn9scVY?an}RYo8Pf)~^VsWqW18H` z!`%wW1cN72{Hrs9}KjZtQrC$0I zWb1_mv1>AiHVG9G|8Xg7H0V!2s`J1`V4M9QlS7Hx3|sACYTnU6P9HiG{yoi7<0zZ$p1l~a*7{`X#YHZ8ql$qQVAkew zkryEeFU>{%@a(C$`>Gp%KBQYIHsE{3`%I$~X%_uyg;AIh)*&U_HpjgY*PehGKhuWV zEubV*t}If{>QHI$8*qN}{){#o{(F3$m@Z3`b0rn%`5#fh zjZv9Z$Nybbq<{2Acs1o{5niAvf*n;<`$p}N%lx12{GEyGNMLa*>=Z12(8WeqnZ8_{ z?T+4?v8(CbO?>ZHZ$)0-fLGZ?5D`2u3k3kQX)fyvz~ayDX1%>u0#^m}Nwf*;q7HEm z@TiO4ai9kQnf%Icaoo0#HcVXThJHW)?^Uu$AIPeXeFq){U&KA7tOIjWk)3v6)iRK* zWOG`Q@4!_d>3^NeS-KIM_d*yl8)!#(&Itm0xVdeunP(rpk0&(mpv0}YHEzi|bEE^H zpcI&;RGbI~KDZFWyyR+4uwBO99$zNUgE~H)q_LUnBD+4{?>MWlPk*{Ecm`0f1KRaE z>BH6d>U)_AXM-Pe$4@%gL{f-rccD9c6?|8Xwn?%oJVt--SSKm%Ek}+W-|_$i^W;>+ z($c%<+A9f)r_s~ZD2e&Q+xm4@kwH%0 z_{l^0`GWNt%Ss8m765)XCi;OsAdRm{yrTa7laU)i{a~%;BNrwfKX8h^VR{4&BP$89 zlI8M_^!Hg(w5-EKy%e1$Qmh05@~Ol^UX^_~h-n{~#ulwtE9H_wAU~~fYgkAFxGCgl zKhkqv&c^G^k{!;=PS}S)7Xc1; z*JrB%TJZwO&epS9GlF^(n5&Ol(a26?Hook|5L4?AKf=AfIiOv5DeZzjM2@5A|K@jr zr1gIdj$aFH;U^C7r0VCwfyX~m9G+X}oMuZsZ5qbWpG8F2!3bp~d5}}WV#`?{~|y0tiYEk=94 zx=Z^b56wOkcD5^NSIR1W6L{jc%+)W!QlJV`Ey*LlkMYzulzqH@;EmLJ1{C)@z$Bp} z9_uHP5qERcYp(sT_SAi;8E7me9@@n zS7QYPEcqa#*kqNll!lyUC&$nNWy<3?gS~U-Y$|C19<6Ig8Ur zDq2EAe5lB)pH8uU8TNMn=(UXvY>SNX<1jE7&oOgPg6Ho5t*HdXnvj_MtF+u3mphj1 zBVRnnJLlBrO-=9#m8NxL@pt7^nNIFLc+i<3^h1*K+v!^-YG9;Y+e(i@Tomz?8p_-0 z8Zp%V@c^7*MmTG_v>sL#oshG+{RF|e+&h;V#n~(UX`KlQrclx)Jy3X^Me33|uO2nO z?mKR6wuqplVkJe&PZTM>`A-2aZLH1h* znmST)urd~ZOF%=Ze=;EZ2g>XlmyXX5;{(l;rpj`cWIHVaO%s*p6`yr!h(uhuMd_9UG{dryck*Sq>RgEKR9c&BLHiza(n1z5fQuU}q? z2NetddMklISj(+oFIAGG28_rIkB zYG+4@0W9Ntyd0yp89H!{UKvMy{(ysNN_MWRCB)@kC%C9kd4o8;@VXAV^%B6}{lG@4 zfQh=s&+9sh*|iu%8=LhDk@(jcjYX9jY=UXU*IkU#$keFZQ|JCVK6L3`xh+|WXX1QT z$7vIYBmynwc^%=4PaNyj6owTM9`@Yavn^3=K0sw5J#8V!^zBw=&Kx~WW%^;f4g_q} zg_s|(kHv-+!U~kHvO%8R`H(#rtX( zj<&?YBQ;Vy{q1lkMc(o%8aE~ne-HAxUn|9C7O}KTH?gs|)FxnvC9k+4*QSbMeE90y zrO=-s*imE0-V~~0kLNT_!&B1hB+~hePa57cBkUS-L!)%ojp22X+e?O}{{46FqFZR1YQ<&f#a%j5s*vDst6ZSB$-SahQIX z3M}aQU;dPyWF^Sxq}_zRu7_!27(K1yCt~5#lpuw-W~s-KT=~E}%XpcToW-4sJMc%X zW<0?kzE)-nk-FO`hkXyba;ro#ia39*KpAB8AQkzx6epeA_yIRP9v4^*jK2o{f8qMv zWvOyCz5KBcXY-z28e@%BlNLt~9|5d!a@H!|opga4a^E+cICn??WL8I1(ZfS)biAVo zgd@XLhMyPo8q+N!4MLTFy>^^T=&4gJTt+ld$98s9fbXXWI z`|?)%7ENwUFBZAs4H-C-pkwl|?STUxBpW#NGq&V`t3h} zhHv&)W!7SlZTQi_xWA}%v;=ApIcSR6Jle6xC~eG;z3DSBjF^Qr`}OPMNc!lp5C)BT zL`jR6Turbd@=gXy1-zBcD$7}V-`TDPOK>Tl%!&xF!EN$;51+9}O5UDM-`jjb{oa%> z>q*Ceo^QN5@+znly86SO4sP>e5w|w|XYKOS{jb;P5W^h7N%8kn==DX=9&jdSvY{Wm z%%TI*b+&4K;MC2>;?_=PdE8U!2Or7mIJ>y~ZMFHhLspC8kkeAh^jw)Y= zh*_cdQT5lDPXo>gDvucW;O9?_qQC;40;h>(g{oVMF3gAg3cwtbu*sU5rv_jt--?&` zo;o%@3}f&ROv;v5{RRUHK=X~%x}@;DGDhj0oAEn^(Ry#b-!WeeM&s_>9{4#GoPQy` z!e&V>r!E5*5~gl8DA<_RdvsOO)iCpMc9!`W41_Ch2*jb#YaD_rj@>ynIA zn~LWvUa2X20EtTkj%Lmr@YJLnmGTx}t7CM0TP+XA}UEp?AN2voGyOaI+g&sDzGavvbV2v}_1?>@8nokUnW)h5vBHu+m9w@UZJ0 zmVsl>k~UfLaiKZWEjnOHw553WRHMpHNbv!a@R3$pmF$i|>5eTY278>rPJYtg94u z*C@XY*$~)-$F2}YIYq@}e@Wk2XdR*XO%D{L3vG&L6^NgV+Iec`8z@-v`asaXniD@* zqaXKHP-jS=Ob@Z_Jx>apJo$JRA||Jz{p|g$C#Rv`G)`EK`apK^V>m;cWflNb_`^kZ z`>}_>d&8rN**6}%)0L!G6oWo}IDnImJe`0~JN3edZgmI2&l^MWaXe0_E%@-LV(c8G zwezwBxh$Wq$&a`(k(E_>B)C8ai6+^q0WjzsslY2A*q!$Q$UPZ4P+xG*q9M+sDmZ)rAuH{W-sGy355i~=Ux$6J*8?-$^v5;`$E&y!%$Ve4qf>W&?pGXdj zPD)-cDZ|;@K0gxawWb1&6C3orjcDtU+<5@}c;_)?4pPTvWCc4;%x410P9=td7u>}d z9U0y(FeOh5Uh98*y-LSyzmvs*oll@>m#b0y>#{pOo1JmE_zsGBXX-AjFjV;DS-|e_ zBWT#kRZ{>uLp9(2GoUA#nZz^su9HQfyadLPh!9DmT&*5_K;A>Osv1?yr z6d_}9V16zDU{a>23hk+ zDX7s6Zi=`H1m#0$`WrX|$j4_yR$zw%VS31J^EcJ=?s&3 ziBY>|c-c7Y;sByXCqy1$5MjkhL1S-x%$XWeLAxwK7pE9FB!pE((4$g3g)D zaw##TxavE0KgjWi^6O4?0h23I`9##MrS4=2s@>Xp2}1&knk3H!`xj%}tPiGhBc#t_ zSZNU9_tiuRlztq#6MVE3K#+bm?GqG+CYIVqrfKizj7-Y%1}(7q#z|Ic-!!=oNX9wH zu3+{GzhCgp@EaKbhn6uA)F8%l8|D=#aLkSmiYGL`?Sbm+9(6yL!MD@e1O}ALI=f4n zm>^;4Nkbt)qrX7z1^@18C@UE4=D+_&u|I$w2uDOcPJ-~<*%w0g_@hu-y#I%SbP4`v zY)6&Iy*3R=YB04rmUUtO-gMpXDS@5htU$d961E`xvPn9fT86=6QvATrbQr-(?1Cs1J0~Mn z2$CsnO)e>Q%SB)UMaeb$#Kcj`*5J{x=E?5B`fKokxwB3$T32uO2C3 zIwLM5?u#fr5JEOIwCiN@|3vgcY)jytWF^eT5b{u9!jtrr3w5PfnVFN8TlfxHO7_&s zR=ajguQK`eD*jIdNf#U@3)}6oyQ|D1_v9pXy#N{V%XuoUB* z-z`}NQClrxM@K)=MXW#2i86Ap!Czq4Q3+-@zqnx_w@ZNX_Al$SU`UF`AvpT=ulG;( z4m|nvsiU{IDCv*dlqNdhD2G)f)`xY#^SUo-k1xHi&o_Iw*=L}_2FXb$mn6CmVI5ta z({#sI$ED{TIz9Bs0!XckgkoGB;yB&ni}?_LRgNy{?r=)_hcF^LEok+%z^Gd9L44p2 zmC-OV`8nX*p;$I9p~0D{9vOPPY%8-Q2}=CEmA{We8CIMe4ySAI$MF?S%_qE!2hk3GOILC zy=$K!2`)-1It}DMD!@C0^FAsyG4-YT7rqg$tx;|N;X*@l=ab75eU-jmX-$WbXVl8? zRtQKftVF33s>5>c|0JnI+`9+4b5e2>N3_j`U!k><9krnk9!c$ zD@vxQwuB)7QAc~WH~XVhWs5b;@X@~pcTY*2#-MP9fp>%kN`wn6oR@9v8PFGn<4CIP zE+YH61YMLTI}M$DwUH#RK5z&*ShO0Z+wk&~UEcnfZ}#qfQb&-U-DNR5OTjzKYD%=K z1yBS=43n@kCkoA17qqXUlTHm9JFiJDlL%c^{q~-Ho}#mnVPG?yO;a4MzKuXJ8ahn6 zDkmCPa-~rSJA=jngD>1Bb_P2oMV>aWGa+Z`g6JWp)Z$h&Y9ia77cHbfoDcfAvvBag ztTNP0%JMvM?9Il@8zFjim!-?ok4otPPf&kbLeK96HQ*|1;3UVH9IomnS?CVgl*GIM zj_z^yX0qR5tq~{8DgV3kD{2j5{9Vl#9M1fP=+dMvD6eNUc%44iEre|Q9s=DVPRdq0 zv^?Nhs@jVy69kizpsB)69FM|JL@-V6W)iXK(cX@-x;vdNqs(pco6`SLQ66Df=Vz44u;5O(tVIk@h@wg44UW;prN zOVFDjv+2LtN2^%o@48MyQCg*^mCDKsm@&_y&|5uf84(o%RG;ij`cj@P&=h1W@vmw( z$NHbd1EK_SC4UswLep7rgPelAM?AKRpLg%BTGuP6D3H)8R;fC%f%ZFRagjLwdjN`lZ@506N-AL%4 z+?ft0X-irEN#>O7x!k?Xe!Q!r82|1C_KA^qRIlmg@OoK-Y476HX=0iZTvydbh+WZW zS5gZ6e0Ng}Py@i(o+V_j#C>arPD5urFgGa3QsDwqu!(`#bn~MV8NOfl0X@x<3n9C= z@^^ZoLDH|Er~2uiQ?zg2J4#*%Zhhv42&8_J_kkn=AnX-*O%@(~D@ksW70By(rF9+A?U6^VzkiDq zeX)R9Hd+r(36vyMksn>`rC&Z_3obJ|`MY*qXjg6sfUoV3#~OZH%dwQsF_Zd~REXVu zhyQ;+?tMqQ8vBMohqbaB>c5VE-dPaRizt496wsS+hY;eqdF`L(tjkn%3MF0AS zLL+9iQkrDBt9wcOIUeb_dDWSsEuyVu7RYweDAi4g}RG=Iq4+_s(<>r zo{I_d&ER=DeasToB(8PQ(_T2O}yqq{tQ;o%$)$p=HBY@-4G2ysuNU z>8-(_@Bh9*0GyW^1B@y;)4Za9EawEPJUdRthy2Gtoi8CT$PzqM@xQ}r8&pVyFti@9(|Y;> zJoWiwK)`LUG|v(r3DK#>dB`|n$qi^(#yv)R z4YdSPjZ0G3hN2JVJss;t=sewiz02*Z9)Hv;*G9@N8BM=Q$Z0>s_vlQQmmZQ6v&I72 zNog+K?~dqtkx(85wXbTUK&sQY;O*KWs{G@aJPh^2xnO_hcxvYw%${9$=WyJmo-(}~ z$n%%Ny^7Qa&tpeUx9;RguT-Om z1k6MhCq9AN<0tZ?%@#K6I*vAl-J3x+h^R~_=R&*P>QtK?&+bWJSEd;;w`cE;?F?`CG0xQWy@oS>pCm=k$wc!SPpehczN2?4|6Oloij9Iw=W zgc$M1>l*dyz-%D^xT!dGpRD4lBQcyNxR-&*Lyqez7q@=qEfzum0nJzPtFAtQcd=%o z@DB_2E^QrZ&odJ$I-4JoPHE4s5fQm7?imJ(WyY`>pLKHZBs#sy%PPVyf$_D>h1y zo$W|!&ULDvtEg6FTBU=N%a*!)hH1XIA{V3g5NnR8@lHAaPFk1(bd>>zh$( z_}|GZl6t=H)|uYXtMRAlg#1X72Q88 ze>cO3hqgKrq-(J#GqcTMHF~dn9@1e#pFSEyv8uF0;(O}VEu#oT^;YWZ-07-jdI}mp zmPz!PS-0?d$}+RsD5*6*Z}p4FIHOKJW+yy0 zSyxp{y`5;N1{t`h;Sw`DfnC-$8`6QMsj9wJwYa@BgtVt9WK56KUK8auLmdEM+Jb~* z2C*xmvkIqK_YH&7mSO0?HmbTR%(!qg7Bml4^IvWf)lg-Y=9x4BBFm3j0BXB$s9M{%ff^&ZF={thl zE?&j1vP8e0+=CftFw4hb+Th7T$Ew~F(47qAtA?SZ(+33&juKrt%sCZ-|J&{|Uhhb$HLch^b8J ztb5-Bj`)SbBQ5>XRN%c~Y7jFw(pngw>^# zbfe_1#vaQm1x##K%(-Mk!n8Y=x|qr(5BnF31E$0-LQ)s{7=M9D2_HjzK%F7xs&O3> zlsA*&alzy3Qm3WHLI6T58*CVEMwL7L!if9)+Q-kKI?7ITws0+NddD&>0SYA0(n1dV z0fP0VaQQ#mz#V+7Z&FXVFDQ`RqJ;7MrVnsnqdw=6ee@REw8)h$C8VtD3A?14%p^Cl zg^G#goy&-Aa>S-(|Eyd&fL_iPas7ZiejVO|qI-$L8JKM~Iae?q);&itQ&%;jP5=Vt z%L6WejW&bWWjK|S_!4@sSxLFt58AmT-55Thh3qcJ8%!Ja7 z6ahGx^{zNgd^HRj(Wy5U&F-ENLQ1TShPD*qM^{#+)@B0Qo*Owc@AJca0kdwjCv6U( zs&T2>FoDCrxKdgeBKh27ERy;))V-nTAeaW7(k#Ec$-!;B)ItXKhY#w|j{_u##J`zg z1#9MAE4t&87pcMHEcmK{i*yVbdI0Vfjpv4O8 zCzd?dL)V<3l$x%4(V=N?2rzMQ`MAxi^3Jo8S@(tmH1l-L62sgjjA*vqH8+*QU8up` z87>GjOkg{M{&NMG%QI_HusK{Q_2~h!{vu_fg}LaL-C?MMto7*ufcY{33|bjH`xC3< z0SQ%&@EKN%GVAu~h=YRl%Uf9`*GcvRcM-%B^xNRHPjhQl{cTp=C56}H@SbwOZ#`Ql z;8)7~UK&Prp zcP&?`PzmS01-;n$6~NEEKCDt4F4Y?Ibjjl!)1FZXg#avJpr#LH)mJ$c;$~QR>mO7x zwE%Ne5s+LAjsqFN>8;#a;Es6f+09-guFchwD?eO|%%k%TAEl0QZJBxp$^;<;r6yTE zw$4bm8eB0v4KaDq`!K2OG+U0Kbk{gFR+6YIRpsbilWU#X!$?+@ix4rSo(GPH#y` zp@9~Em~e5Oh9ba`)2gOt24E7O+@;P$*M)a7lZ}AP)w!lejn^7Cv>ORH=e_k@AXjw@ z{jGh?QarN^Q1BUrL)Uy_FH5~Oj0+=f=e@lC{qV$3+q%YjZWHQ3FR!G?=&Bawd3?&w*t!K z+Is*W0%hQLACwEvV`*IK9eZi?xWW{qWRHKH)w1G&hT=Z1XSWn(MFCFG&f8Deq(oaM z$G*b_Sm?5lzKV--rq2KzH>CFii6YDGk%u0R!vcrLnrus>K;6dJUw8xoMz_jw+i=q z->27%FOPW`E?Dm21PGkVvozb1f_?CMK%A0Piu-x6j(Z$zqY?+g3eN&iql+-tj-m!< z7iAQ?Xd!Qbv{fVMdP8qd2#%RWuBRJwpEYgZ11tfuk=t0cTt-S^#|H1o>j&>Hhzi>0PBN+ zv1x39D>_MBqH0Jok+Q;^M4Ru zRf@=aLwql$Ge(@Ey@?yk{&03X52JmDI;$>AlI%V1WFnd;OWLGwr}2gb zheu0J_Top@V=km~M&)0OjcJGjE`3pTJ;U*Q-*i=(f!=^xW7ZFmaU3>(;~_T~2GCE^ z7u-tuk1^s(Z$!;r;V!K9e?)e!Yh#Ik(pKyXsfQn(sP9#oypFq0xSp3U7ocuBJ_7=C z`#SwkNfi+X544X|e?aYgr zPuTczq=N*Y2hg9bpr#+jf6dE3koe9Y&Nd_gbl&1)!F9F*(aj3i3(fu#WQYNkPsHqGzT z4T@r`$1{bO_lCo?FnlF{BnMRfZF=$4tI{YPASiuUT)+O_W@ZDQv`X)E`;m$aq7uDX zrA@jyk+%!EAntSnN7C>p*7+UZK2Z3_Gq%np6wZE_HB zFo6k1354t-S^giv8xYUg<63w`hV~noJ-i1+y?%=w%HW&tY3~bS7FP*vvBbv%{){w? zZb6KzidY5T5~n{2N5bW21=>D~A8zZJR7}<3v+D)R8l#Oeesuoso)ef+`kg;h9=XiI<(|Pyt^ZAeg`Xqa-_J z$Z}U6xY`t3_;%Lv&NR#X#x}fAMjm3Ncx2j_%%XR}S9hO0%>UL%cugoxg&8`XBQs@0 zS2txo?N8%A+`wdJ_an0gdKumqVF`R6xmKRVJKwKo}WRKFzQ6y*gQ!ZV0M`yFp9g2o8h;%Jn<9#y(0J8Jzne&8){rX zVXO#m7%RNNQ$h|2^IPh+#duPx|2)mCgQIu-VJlh5Z}aka=hU-E<^w8&$w?)hF9~Fg z;T(70@`@LX7>VuRk~jTE=DCUI9cVuPnmgCV$x|q@Yj05DLkCAAyPt?Zvb=V|Df^K2 zbn6!~S$%v46C|(e6cBfQk^k&T-h5=EFJ##4-Syt9YDeEC@VOn4aS$FfeJ2^AB*;o# zVI9xex3mLL&V7nsY>hhrBYw{>ozUvMy+_Ap)>d;-Luxgwa6fyz%2!~Vg81qFeJr{6 zD94=8jy>NOCS)G#2o{SsLJkFd7d@-15iye7n0BSFH}Cv7ICm$l6t7m>wy9dYYjW+M znBod8&hz7eVULkbe?ZdF-n5irne&F8tjh~@PCoqn@!?_?y6>7rOl$RzH{XB$${?z% zf@6l23r@5h5kCoD`$Fl`YwezO86uR;E@t-qFctVy5!n9%gc>^%;gNxiwKTWJC3ekP z4}H1(V6iFJa#hJ<(v07ZQh?r#M0-mfL z-B{H52NK$DBBx|<-dnD69*6tX1f~oMs8ICd7iuJ>>jJ^CCPPNKqWdX3Zte$Yor?(5 zE%`m#x*1ASwai@|%tZ^ne~}ntI~)-R;8y_I4lA{fzL*>`PHXdcV>Y+Vd2;OilF(I^ zGTuP0aYI=qf%GKmybm10o+?*6Z*cedZ_wWI_RWnovi6d|F`42*cZTzB zcW+!}^Yd6F&TcIb`O@oLD6vvyr7BWY;k zH#k&8r!w`?LcXdX=Ju;PtqJHL{}0469hsXPi_KNFbYbL#7Os02v6B(EFb7r2hh+p@ zYuK|0(^4lYvg+kSYhgfiGWNzQ%+~4ur99Xco!B?#n0}-!6;XqgKK6!lUpnO z&Qv2!nl8q=p`(MW<4-X%227P-w)~w2hUneklspq;F_LYx&j;{_bh&w1JZ`!*w4=Gm z>Pr9d6clFH=_&s(|81Enkvy_@s(CZ8W&wX{qk*8aM|<+`4I^rk$`qhn17bC2zE#@C zBI|Y{*8VxoU8=(aRC2(%W?OjtVSv{TyZVIJ3JrZIM!91xNL;ig;`>5O zK>EtrJvX(iB#U$35@QZD-Gvb{n+NK67S% zN1dl7^r7>T4{kPx;v3g;{NwCm-7N7{uUXDb8qC)ypVukZFBHHx1P{Z=9$42JtA~hJ z^<(Gis;27F++^F!ME|E!Kx@FJKz~cJJKJ;2Qrn*3?-en91#-gf9m_i|2~VEdceOCz zKLzjv4K9wW-lFb*$SEEA6S~luzXbc`Lhz&ks&U}B%-2(ST{xvz=Q)cj6@QBk#Ou4aPBV$Y_SJj}&udK_hLv2kr41sMiLBYnaX%H<=B$+Ey! ztcRJW+2gh~VbFvs)k0V4X8F0(|_rTm^AOVTjRkbKn=2#uK1Pbyin}o~X z$6(XT3I(r;Fh_QBT8`zz=G+3yY)gFpvyG2bT@-+-MTBNC-=DG+od(G1n9C2(!i_~W zi(g#Boak(~r^?e`Cn{Wdex1E zv0(-4$rVy(BmDnb49MEiG+x;B>U(x)SAA)T`s-ZN?lt>q;L5}{R$SXiDC;aOk-nAC z(`-0=tK@AMt=}No6Y&hkFQ6l)DMGl3Uo>+qpjAL7Plx6^p3`I%N$@sG1JxhE$=bN_3sl z!o?x)v*Uo?f&~;yge}fIuwojP_{Pq%wgB&rR7K9FZEkSJ`orG~{g;`&avy&dw%RPX zMJJ^w`q9(&zf|Vz>9oQiJC)sK5o;KaU2u|2=bb{${J$&qaWIez`=Sgc-wk{vJqq?M zfi(%J?=7jgPK5pqQ$U^^$V|*8!J9kV{}(%<_QO}ZdRSV#Wj+Po5zsAZCS(;i8fpgw zl8aw~cy$ZkwNa~;6z+YQEL4!Cnt2+I%1$`@aCc$-xB`v2zBN<+AL38&jz9spD$~Ic zS#*sM_-f1!R|l{DzHB#~H*+L7zA6{$WHvnc#j01Q`1RWT*GjKYJSs>~`@iKgcD9(^ zTp#pVMFAgv>BIOGO`dLv#)>*Yf@>_#-y~5azA?ynH zZp8EJo^7dWCRtp)N)|npyy6+p33L%mn|q`4oeOLD7Y}c%O=m3t>8^K;PV~Q`oI;G^ z&XNMMzV>&(5mm|x^9KlZsV#3kqY@)X+w87x8ZjaQKp@EB9S_59a4|=CI0xst3b=p| z)y6JxjUh@Z(}b=w2JZQ9%htP}8(D52u>_rVfZncCvq&)mDoF1d2D$|>4c0}DJmzbS z5Y0NEUiZm~HcVFrY#%LZa6|clT*J)LY2%|?=ax+vt}eZR7Sn($^5--+fz99M9=@E(Bx z@@BMW_e(dCfKU-+9r?|_B1w}wPQkDVG6*CK0AiC@EwmZ5EkF!1oNqiDTzS_%qNh9) zOfa}D6%eqkgYUl08Hxe~lD;=cK_C>r1ufvL|)_?#}J3Kto z@W9`>>SBEE{$P{Fl?utoFk<0c-pdG&6h>9kLhkIs`nhly%)&?2jY)kY6!okW5oiJr z2AOZIhk8IpjoSWCWZxRLO;TSBjR6}awq|?fi93^saxCLkV=AuwBugnOtJtinFYY@I zy4t(srpgTUx5#R#QaAOC6HS+M!JJn3|9s_IL=;yh4QN>$fR}2tKvnR#rVy8PC&D;U z%SX7X>CL6G4KZ4LRJ2(OjiaHq2f8}vCRvUT2u5$TtmMw>1|6m*=XyufzC;NlQ1TF} zQcFYt#9(Pijor!bhdN;_Z2!DRfy)F_868a5wqS2`RslyKe_gLQK}**snXFE%b1Kwn zKeOyDztyD@d{l#az--7%wotSEEfBne`@V*2Lfd5rw}Ay5upr#-Y7Lk^+kKf9O*XN6 z5y&-b07~dwL}u}8d+2*$&PBNsO~^(1q=#W%Rty0+Q|80L{flqQ|JrD}KaE}cMo;GG z$x4oS^aafjv4%oGiRfg+L>%*ICjf4it=gszP@o#;;GItWf4|_Xp2fx1mgg#+3WNWt zDtsxXN`MAD_MWAhbSr=mf=l3Iq1JVlv?H`GxSy(tz1z%fTCh4Z8IeSq3PgSm-ON1U ziel&YPEI-{ap`sJ^eB{YnfkpVGy;_y{>i>bwix5*-I1tRE@nNL3P?Df7qq7-YxO#Z z!fWz$0AGWv@hvgOBX@$2@6~i&4*e}NncNUua7Jmqo_SNIN)#(w%dNq;O;F@bQHUvz zhudxp^RIZ0B7b;8e7z+uMX@dnUZL(5M(*$v`xL#F$ah_Oe;mIB&4`P-L=+ft7eoSu zza@1|ikirh_S{j&U0|>uz_}(Tf4osx7TqFJ`}^qI)Lotngg8WH5z`a>ZUCIOq}kE# zwp*-pmG`+KKwy6dfUq9^V==nuQrxY_TMS;^l6}mx^TOw(~m`d;Fuek1z72N;7bdwjEitx+-SoP)e@pe+o@ubtZQJRP$W(tdtwg%B9r@{e-)L z;jd05@F<>x2=>Z2&F<ls7Ohxgpq9f!BL z&mi!$xfAD zlw)y$cJ8|F--9i$rk__vLwmaI6rpXM1$_}^9YD{8+J{PX}fDNnpWR6bmAtj`?(4+{iAot zHi`QV1=QycPSj|Yxw(30;??6+#ucVo1Fl>T&?na1_Q@zMwWo+dC~hi~6Ghve5ODFM z%F(Jgz)$LwwKnv6=81e>%Wm@%3?6fdC~U+Bp$A8r3eP)qy(Us&h6WPgEuNrfudmvIf%v(EB+C(u#%hox#l<6SOK%4>crdXF~{?p zcKr7K9^i@>D6?TeHCW^rulb~t7h}MM9n`8BggD)BM_6+{L-*mh>wW$KW?zfl;(4Qz?Ne%oE5ZhL# zEw{;5Hi-pXsgyXOW9D*E3C8XkbK(b5+!oU(LHSN;f}#&FS3wggbfC{-ixySiAE(!T z+7Aqi&nFcH@7~^WdDy~u`H@I_s{xxmnv=R?liQF)arpCW{FkY)i!bHFC%Fx}^#~wZ z$2Z_@Mdg*1-T3iHOE`~zLdA`}GIzpcrT=!uZ{?QpFElW&lTJ!oM^UO1AD3Gh`w4x; z$$Th-e%4?=Hfpe^Bo6_%K4ak46sR=qq}sXScLi%fXt1TsEVFp=K6#=TrMz*A5j89;egKQriqZ9O-3%UQN4? zaT}Ka96FpRm_AZe-l5z(&B(goC_M-y(}0kIPqMdWolOQna4EF zS7z`;L)I=XgyX}k_)HzHf*XV(5U#K>RVe1mb0gBr9s5i}#WnVhvzQVeRTT!k92Bww z&!_@(h+&-Aq3MAyH*9q404Z}A@4e}3UUGs5bBl?;TSN*0D&#l4Dm@Zb#Z|VQ%gUEx zzQv^pJxN&jM`8v(IFOEw4W{quy-N^O0h6A1hQ+UG0(MflV&;|3!M3|ZrG zx&?(>+x^+FptsP4k~lYi-Jw}r=varhrO0Vrw z#`1hWJ`Arc^eAk2hX;ID$9$ehl;RZZoW*uv7`TuBgv1?Z!CzaqA~K@On5_5a1*t6t z_Wy+hCHK?>KQcp`%XR%|YUW{qxtx&})$9W&TiUh*jv`XRw;ZPD!V65G#^IU#g>mZ( z7PJ!|uxo~;W0N$46ZpB{^^UM#KJb&il(l`7*{Eopa1ST^y{`)rSp=AvI2qFMVT)K_ z8&^55BFJMzelgGY^^T0VY}(UYGvA&vTH}6Y?3rWzUf9hGHWg9nA0NQ0viBEPcvf2A z3!mFC6^kpFSPPr3kf9v7a?j-Mw?!K;W9-cI<}*$8yx6|*5uLyDtqh7Ku#HQW#^-1# zXNlgcly8+!4!RjrAe3Oxz5QNeRXkjitJ=&&Xg;?G|9zu)Aku1kWN2mlpA8xE8Tc(B zU47Yj`7pbQo47*Oljm<>)Hvh2Tex~gR=MX&x2`VTRN-z?7=L2?>z&(*FsH3S&|wF! z{ju5dyuzCGmD6itt&%E)={}1}=P!rN-{1b*h|-V0jHCQR>sr9Q@l`;foRN0+O#r3u|P ziFA(Im#SnQ_z!x9hhK2uo{Arai8sZ|-o{ia%~#A?FJbLhErZ^{W9&-ETSi`Ww((~f zn^ig>+z$|fs(0S!Zc?^iN_9aQ%fE-rBeqQ1(c2THGK}$`Ho@run!gg&kt$e9_u6?f ziI5u4dg&JR@3i3~v}Z%P4^J9273?ewi?IZ(HP?Fe{M>2USEex`>$~;D9aK4euKuLl zW6DYxuqwi53rI{yI@uLyw{w&@rgEs439W5j(_u>}Ty*sG5sbv;=&Dbj7nDE`9;?k{ zc{!vXty)~M(=hNDKc%rYSm@wT;MZGD)jFXno!+g|h$tpI6ZLvAXsh zc~$$U$v^owo%*7Yu>vgWE;AjU=D;kN^yNff14Tk7aEHWo|8{TzFBz(%bvKnwAYXW{ zAXZ>iTy;$Bj!xT$J6#@EvKbfo8XL7^9WcW?li}(i!+tR->8*>G?A_mAYqcJVjK!^} zUMQ|lIrPOL2WJ?IbBe3hb#!#~8E)IqQfo|GgdlU4N+I(8UDE2`1+~bv7Xy6Y*_dZL zg&D7H^Mp;fgzoE4PN}O_Qo2;3#Mn~rbDrNN4tO;?$yQv-e#EWw&Nl#icj#-G4B z3@$g+G9Y%oZ!k?XH5v(rcWi8&49LN6%Jk|Z2mNzHH{B*N}~LknW4gN4sxh?w~Mk$Xyna@3}n6OlW*@m{n7( z&}Varxk!YOZ>_FcTB=A{8xh>MI@xR+Zhk>DH`DYCeUNICPw4Fd?apfgg;UB6d8XUz z0~+<$xOnRFgSknIJW`D=A{ljeZn~FcH*>cda{tpyKdP!@Z|N1$6st=ALv{Slv*Wm{ z4dH^RWkHKsnKNt?LK(!D{Rf)877WWqEjh$gjf7HNyxQ$dHoS=L4yIUCmp}$qQNax; zfQ$dihY{TH8W^J*d$vN!>RP)vp5^t02l@BZ95i{TA$Q}BtaxiNK6<@xMAqQ)_>~h< zo&c~)v6)#IDByoAzT7Sx{?X1o%vJkDLeJ;9Ec~hyW{VAHK3w_Y6ptCFMyMB)Ezw6nC%$S-hxBc)N0E>H#Mf1wmrre>6m9 zma|T*9Z>!E0!g3lJ-U?P6|(y60{RG}jiS5Ncf0jzTx#YIX|;Gssr2$0(0-YaxG30o zjeo`SL;?qWTav(V!)l0o-iS(DnM=9a2kFSf0ld!FRN_*1it?dF3!m7FUQb7xKlfdc zs%`?{No3-uMea7W0(dFq;yGgo2#Qg5Jh02*UR^Dgk?)`;Aw1EKFZ@04#b1s(@At7K z81dXj&n05)7eO(YlE!9A`my0i19=&NVMlxWBjr4y+U1b~Z4LtcuVSNJbtDAW6BJaY}D zF9`ZPKI7Z1+-8-CG&*oOOy`@2c>39gqmyK^Z*Gw0`Ondhl z4=~M=!!kpHOU_$Nft=?$-Nt-WHTEA*fPPj-D6x%v4lH-jVIQgT@eDjQ4GexyQ#h~I z*nRGlwj?AqRFTu*X{^uCkP)$J#%TXWrQxa=%cLDKSCu7t$S@jd=4UcLv20eR)_dtE zJESiX7He=Q^j8dTe&k5yvi_9jihgE_JJSg8|GVz5e<1XSXrMO-)$}NBcx(`k#HQa~ zi{Uoe^(F+vRM1?Ac+$&63s-JvGw-qM`-dRfWJpOf^0HVL8Uc0Q-%q{cX6(Q@V+tAkxVxKv) zQu5O?)C-9=GO#H5X9XT=^SU`0z{LAqkjzBi7wT&gv_bYN#8n)V;L5u+#p$>8li3}v z*aPOZaIDeOyfc8DQC+2qsgYHYw>MZR%b9m!5%TtV19I9`ghNo~m8RBpO>$aX7I)sz;dVpiCaHoHoEPws78wNkObp^-^==6%JT<-1vL7-q#pr z$9AQvr|Jz$4eIq;N+iZ8ma*f6^$>$kjP-Q?g2%UNiuqC#&zva{aI|PDsj|wHJqoWl zSINqKOgMg3Pu`>_d`q`IT`Lu6*>4sKK%j`d<_n`j@`U#5qxK~_`HHW7vJq>ohUk=dw zgH-gSzGuh=nov4CULxFk*A3|>HqHl*e>Dqy+3W|e2I3J6f)w(H#CpsC&eHpO{*VSo zlw%~zQ`{kJkhUq#K?}%y*Vf>UpS|AuIO*1+FfhCHvyF`meRep_su&3ym%dGGC9Q9l zz*ggEQxF98KAU{xeL-y>9K&`?xWV>Vxi$IJ5^Sr{!jYc)#sJ2}|M+B6URSo_RHOGp zjl`cBl@)R%y}TPVuj=F7PDif#yDl0IZi9!~XsZ2=AXAzXz&HXH%T`Dr{jSATRLXfq zy(eyRSHxt$vwP~|;-#?buJ|2cP43jGdGYMXH`_~5>JNiAr|7dy zGI8=E*sO9;cd98SCY)Z$fJNnHH}kGE|5<&KtTLME#j2wc=_jzk{AUKsT&)B3d3rAd zF-l~2uin|*q0-2A4^|cY<$Rd3B_Ng$BRikQJgzk5X}K1#Z|+K&I3v5_#o0UOO)EJA zVuCO(gL--Q5=nMK+bNTc`lfjA04uKjHszY8=wOFbV<*)v9XlV6lB%j}7oRm$)_H!^ zMu|+#&U7a6p9#c|iRFkUSS90EtH9!`!Qx;4^J|BsZ7-7)7RpN8W=-fU%x<=;grE!+ zu)~jgew;dpcfKN;b0}|Rz*)1S8-zkt@;8EnvA3>{`^vgYJoY67D37X^$GV0ln4BS` z#V-n^3YMoZ;WZkvhHM+PgxhzJgeKHzJ^ei2&_M59RLUD*ic>V!LlmjVd6G@wpaw6Wc)I%19MOic z>5XVhsk;$6b#|ZonU&L4d$lCN?6W}O$38}f-NTmB?)>ddX_)nvb5yvumgXSYee|+X zS(jJmy#0GODJpSUg^r8G>^RClfKqk9&8sTYG3_9VaS)YPi1g;^t>&$ zy~iVDfOh7b+)5W?qhC=oHWVV*AKJH=Y2Hw zxD)Wric^J`7=JcgT#P)g;ik!GugxBemv7~G7g~Ax=&2+|EmzSG;bG({@&G)_H}K`S zYf0GKe(sbU{u_B>0(9b2Ss+#juuUlXaX_YXSJ^Uk^}Wh+M4 zvffb*&d{+|wM8lSttE`LXiCgY(oYrmqU^eMHtL^NV4l#lW(RBM6bJp2F7ETa^sSw$ zM)d1InLPj5cu_;~L6CQ)iP@SFPoB^m{)(E=&?%lMf^7YC@Sx+IEvC=f-r9X7S-7aI zs$QJ4_RZb%A$=C_6khKew@r2WzF8Cm`xGTCgbf{CE6(KBMdveqcj^3#HpibG7o4hp zzAR~JvtTM4pnQwa7{n_To3c{>e0^O-0xqGRT}!mnRMT@4iBJ2i_bL&~o?=_ds{R+- ztDtOKKb-q1a;x2Sd|<>Z)HK@^yYjg%qRiY>*Sf{BR1-Q&+{INWqbbYd+{D5V zQhZyvT6sKm6Qs%umYg*4^~GVJ z&R{oZkhSN*+MxvAM%n7*Y8ekO?0ObadnkFjJwYAWs+J&+RKD0>8op{=JCfIk=E_lv zlME#Z2R05aG%Xzuc9))Yt(8T{EZu8;bvVyHiL~UA(cj>LUYVY&c;=dfDFXdFGszVp zv7AlJIPNGN;o=vz-8xp5@1s1wO?3_T6zl4C2d{|Bg$Wfucj1h6E8|Sn@l(sJNbw4^ zz0}v0=5V%T#=Tf*N|frQGqn}pYvD`ukep8x24=8Rm=@txD+ zS6WX!+nMn9ZPXz~GVO!G2Jk#}^1rAIblp$RUr5B*Cd3H|bly>D%bvtpN41~6hCDHy zQ(yUpiRIS(gA1Ig#=`hgR^mXJ^N?|+xUv*icyYVSpqhHxZJ9PP9Cr#YwQ`2%(tO88 z@yTXPdDk1y!Jj1+oVPNAc930}u`WclH3On|Q12E9$P zQwXl&xF}ygo=fPhIA7!N2;74~2O6^SFUX=7}}Jf(V!}Ki0ohZ^qsL zL4wx-2d`#7c3lK@p~@PFI6=%IGvK}PpAix8x#K>dVWNGmRRLA)+kB{WVgZK z)(n>}&_ph(p<;}~iVHI*w|tsHp5&!r5?Bg`^iA=5$R5ai|7UJ%-P@*KuG)IKQYMA@ zjLU^Aw{V-0CI&2pnd@aej%KCpCX{p>Y18;4(cJrs-&0Kjd--w`?Ase}?dxz{Qgl zUY%K>TuiDqtX{a26>ZxGGh_@AY{*M2ZE(oJud7*RY}2^mKMEXMsa*m3k2FP9ae=j` z4OdzmgJYdXV9sL(JZrqW#5AfXaW3C$#nY-|aiD;gY|ST-yflVI5Ly*Y>SZuB#a4aU zCj4izf*i6k@54vjrlzs28KOZk{q4Y)DMX~eEB;0e!)|FK9+P&I!D;CY1y!J1A`&su z9t--w$mk+V)ORUss{N#GR{cgIVt)XInVY2bc1Hb1{sI}65SO*UGSHhQtbC`1=Ar%@ z-aLVeqN^kPPn*Ou_dV3yGg;`dJo@2$gOuykJ$K-jbxSZSH-t|17b$XjJ$&0;$!B73 zhu<8JvCCH0C@D8ZdwrcR9n02~x0eseGTa<{l?~3${BpK;Bv@kf9Z9?bA7!K|W}LIO zRrN>TKqLj1wL^gq>n0k}snws-!cNoIATtMzd1xs!{yE+FQ*w7bC%=V?9pDMrx9(yj zBzU~!QrI~sFcQ1lv$M=~TG)UV#Ay006`Gh87b#>mD%crvb-87%Zg+?|bpcg~qU;do z;(eBskvas*CKjO(9q4eW?@^EYpGBBUDLI;IXnc`Umy~fHzwMojI{#BTEXQsQ(|elU z(@mtfei)!9afyy_QUW+vnA#N%{f8D*0ed@>ti~)|^U=rk)3#aL%eP{Iuv0>APp4(d zV{mxo&OcA6@qRmePo#1gxqP?tJ}bQg-WmYAS3$^P!kr)<@Oevi_8R7Bbfo$8ryvIh zngB@qY}f}*r3Mj7VL;`mYPh!1F|80w!Cvy_Cmaz0GrM>Sgqg~n8xd=6;N+RlfBkvb z3=;@NZW^#`JUhr~bLmr-$B27hvJId7^Bxy?A0Hj@7;@l`$q_Cta*3n4CZu_ql6MeP z9>QP7OBP#|S#rd4=A7r8vJDc~d+cFw3uuHy8{3IUM z--q9Pp#9ZLZ_UiF(IU3LzyDqT;!uf5mF2sL;fCUP^}E{VuwX!s7T2HWcJKlhEI4hH zo!+`EjYryH|MWpF-^&U~k@VdXBW_)FpUa*iL3?OA+K^kGQ{GpS5bOc!5>jpW)D7*8 z43HkiEI-&BOV25cfVIiMh{Vfg;z*!)-vkZf_(bzQ3zaTkVlrq%PFotTanC<>GE&B5 zY;E7q4tH*vaA-$WM(N#+w=w}V1>02GYxQAZaY|eR2v8&x{;cx)n37|PUR#`0cw$hZ zzI_dTv_xBN}=@XN}WG>K&o1*ts>12+Va{yPL@iFY`H|)Cl zC{V?bQoRw`V{JLa-)Ge2)-!0_2=<#|2)lBpf~d#}2VW0xW(SSYR*5c^sqEHTR~9;x zKt#$spJgjme?>r$E_!(+L}v&2+C6McjInF5F#guini&swG(B#@80P1hm6<5-PiQN zDw@|VN;#6|flC4pT;Hrre=2XGqE{Q~(d`yl8`Tp~k`d&vxIWtj!k+5A_^mrg5fz7= z#{6ndtiKGRwdGRn?%YI*(pnc6L0^f!@ftgm`&I>O26ht`HrN<)> z-OPb_RXr=I}m?+B2ElJhHKVG*gylbK90NwMP3;K-IKJnh2iY)&WczpgOj5>16C z0RH*F!k}HtcQkD=B|gO)+2A40f7-KRaJ1Kg4GrAty^VIFEs{XvcvmJ(vUs_MX0k8G zKB^wf?l(3p8hhpK*NkaNviGFyZm%qERJ2ZXEGATu(W5Ho6hSWJ!MHn1EFHW+K7}i; zCiUg%l83>EpghY9S9XJY2+}_T+_@ZJ!OOcPgf=cad}4fgzKjSCN>M@9o)6bod2mt-7fgvm*8#s*$mYOtjq4tb&)jiovLX$9YPRR@(pS>`X!3MK%L{F z^dNRJP+NM`UpbVYW8&KgV$%a!44=!b?KmnQ2``YO>~b}TmA>thno!q&ecOy&izU56 z4J{pnpP_#~EVe`)&gfi6{EJuWUvc1-;F9+(g^yJ^uo5rlgSl$mc4XWcE;*S4?JHb$ zuqg(vspxCF%`p_p9fbq$WeS5TO7|oZ4V?J;L-SB?_^+koqpGr5gDwKqhOXuGlj=r77 zfCOe|YEIxQ?4MIVT>?=cMjZUO`)n*I=NwbG;ufCrc`Rk+gRIw;DnlJWmZ6B_zQEFc zxeR6lsZmb{Qe0$t)#8+L9CdZILuN(B*t#NPBMMD?wtl`ci=nDxkUjMeP4<%PrJfjQ`ttOm`+ zgye@mk`&P?)Zz=p?}t|=8V`1v1(&ygy!<_RktPGVVM8BkkE>ORZAj zv8V!+OpSNgf7UOvRBCBYn1YG=T47K{)0veMwX?#ksz<1ZVu(e9h5Fz9cf<4!zQ1aX z`R#o5SLWq?dJLloE${zJB==7%J<6_N;#K>s_akxF+pz{Wap(TS4u8`jI6i#I>9df> z+^Q$V0gGIx4ZK2dul{?#)2?eN+o>M8Ze#ibJ~G}u0OxNU=A*(WtV8&ONy+G zm;hIq|4RqRF8f~T2kp~RrU}-J>sckEF^Qi*xv47#(^Meoxnvx82l#?RnqqFCj5^t- zkcKy=vBNqd_TSgkcLiNMCSZk6e>`J05eN%}p}}qm1c*}s+$`0q%IP&33TFn?k+CK) z+s@G3={prbgfc*VcXIVlm%#Q@cuC3+0p{?5NiirlH@=5O$H_9AemPPeB9 z_qyU z0k6_78eh5U^7`|Pi@`P3Tf}OBIcE$41$%k_AGv~#U?jLeE|(Bol9v~nNDOLk6BJfw zRPKjSnYp0_Ff^u;Tcb>yhyRtnZfK@#7D#;@l?Rr>zlmD{NDusT`3Y#hKN9vzQO7Ez z@8tV_W8gQ}nM*e}0}({T6wZItsh=W6^BT? z(j`g@)9%gSfAxI?{@hzANF6Cai&^G-0cM~3w<#r*HTl%;cm3tJ$5}JxBv1vIQSuUo zlx%jU!Hg6}abH9~oPG@yk?!l}-tyeW`aL`Wc`kRV_mfX&yUo*7o(92me1jB#uM<~>|d)Dj2^&oR0{=0wSkMF!Sv zNMC|TX=CEne-ttSejf}k|MOibZ%^Yg!*j$Jaljq)0Z|f3Xw3SqfT7U;>muPekGrDh zzynh7>{T*XY;=_<_^mYrbq!>YkWtG895cdE@RDM8cmx~I30-Y;{=KU7Qb+v$zl@#0HZo^dZ(0aRF1u;`jm{q*zFc7P#xPT_ zYEB2>`A*1rBmO=9J(0`HtK_pnv6Wx+V^2ZzH7~jxy##>!0ChFDv&osHn;6RKD*2sT z1%r3gPe2{kM0q34P7Am!Z-b^$Y8&0O~ig654YjBsLaEIUhTWP zTpp@VtM>*Y-eSRBNPXD&^N_yDZz2Gfj7EekJuImQh>~>zQXCGkpQP_piXswdH#<%* zwY!p6u=)s|I8G~0=?$Ko`a|*TA{xaz`}Y9WJaa@DBMCkHE`WO8zeTF`e|v|w9;KrI z6R#Q(@8L=-UNYnmNkAo$EE%iGB=srmQqKX z4@kWP8(%VNKAyIX?}a6ZG!tzSCZ|{?ERI1!3?Sm=Mf`laF<`B=56YNk8{4`QC{~)c zBMhb>&IJl)pQ;-pws~UUj<(}_YXkzx#tX|g7pN8pCBQSV$1eu}k}5lziV~x_vG?hr zb@b;%iu2Dy{_Z~||IwL{JO+OGI_o?6Vpk=6RQ9V*`)C65`xGlspY9aK)PuO@t`(0g%zOyZqt=WVTg)^_1cTcDPqSRpiyeL zf#lo;K&ghqaVY8)*m4??2mRecm#2px)(d>&B2K?VLAnNh?9-toBhFPI>Ey4xx~9-) zILKS(Rj&kV!gJ|qsO<7N%-ppcuhz4EKviLK{0JGa$S5z2Ss+*~u&mn%eGgb$W zCRClgwE|z_d@U_65WuRhUz1dCY42QHQyM%6?g>k7Po$oy)N3rU)yew^FSM?`kYs7b zVKK(B{e<+T)dI?trWegTj}AGwD;9g^!4aaw=DDLzPixN#i|7MBUYEU*TYW;jjcYU`OC@6F-nRYw02K z|OXlEA=9Nq6)E_KDi64;-Wn)bovU8Q3*LE)r2hAOaQJuB6?Z)QIvcDvp?>rK|nop zs1xM}fldzN^};uqLAe6p9f83Qt7I$#4A%`+9Bu0}+d7Oor=UXs1%4?oj4GbMJ2j0i zgjIDI@O8+k78WRldMr51)Z#b_XsZnP&|%oo`2pnd%);2sh2>S160oR6;%}0yG@^DI z!J1?l5L?d?`X=DO#OGV;)OGcY929vThydhII63{wIsVjmrDz+MExc2i&7z{K>`Naf zskDNEdx_U{{(i5Kt|rRD8v{U*TW>Cn08+Fgms|lU@SEVI;mSt z)qlLh?sR@7UU&tBN8gM{eBd{#Q~^_hL&ifJ4i%9wx1Cmzt_ua0W2)H7MF*$5p+Ix$ z-mNynZF=S&l5IWWuQRP|z7}ofv|WqXsRe!sl-nl1Jy#D!=x=9l*~~X4Ph{z$AHf#g1yjru+F87*A;krXHyu1yZm-&n&2M^kx58y&rlB z&V4B8xRcgrcb`BD9UbJIKEoO=;_-8+!;Dxhg*_KQfny?28RLsWu7Hhbu+VOS)$RJx zAV5n=z61^u;3(O!8k=MU5MZ!<9J~i#0~Yog#j#FFal!M50&QXebqO0Kplq^MrU?uD z)f9QiJk%l~PS2}pb6{Oj`+74|^{R{row-3|^ zk?soxv#HoER;ooH?5tgz+%E7%9L(U(c}RTU?#rV9%H^An5KupS~^ifG>z(+aBKgK(~{TS%<6GSplf^)D!pWEw?XWGu|Pv`-f z9Rnj?%tyJ#mOEFV%)k2+>ix2!mX~Kt{T6&db-Aj#73{eo>xkxYtS^Cll=glX1$%IT z*KeFxQ(FhS>4S|v4~Vpibg9lsLHYeS==ZT4w5@5G>5xmP8)HC&<9Nr=^^{8+uDOBZ zohvY`gskOFg=+F~o7Q;=@GMqMuC-q=eU&MsCNL4HTTh3&f}k|c0o9;{1T4yYF3Ip_ z?aSb3mCwQHAUEsAh9J$BL!^#Z^4cb)PaYiS0uxKU`zl7Sft^6~S?^TLKoN2%J!eNj zZ4IFl?dNv9%PfJio|*x`chBS4Ih`DgA3+aQ>V2pY-lTe5TQvs{h^{g94N@p$ z)a@S6>a`E+T);PRw%h>-gdqSnmzAmS7qk*c+HhVh8e_1hM!*L+r3U0!)SA-)FD}?` z2!zu~ggi9T{y3Jr=#=9}j7E7K-wUm69>)GU++&T{9=Ngdg~2Zu0D{!BDPtg6sE_=4 z*z8nY=SNQf{~xn37tiq-na6NZSGdN$AKe4y_-GTb)JyI3H>J^{^O_M7#8?~xVX}YUSb`;uY{$7DDmhU0cc*;DA(oQ9weIyE!-Rin% z+11yW2q^P~Ek@J3oDQ1jZo|PTz_}4n1WsZw0cz>Ms-p8A@R$fzuRt+hkBdCQzNk&|!^OyNb{v2V++xG`VosT`m^d4Nj^^uwJitR8=a=?U4MRd#>^Zosyio*KTq zLu?0f#-+obB#q}=(SH84{KnIHqcQ2y6Z82vVYc<+Gnx|2Y?N}xk} zg8Di=6h3DsvRh(wd8chzTM^Bvio8BXu2bK+O@JjO9c{(P%1r&iK>0`lv-_ebdt1iy z8aaK^^Lva&SEJ_O3dg5$pq^dVZ0jrQ0q0?QLpH zo2fbv6aMMkwMpl6^MdHawAP*#D zZ7hw*ymr;dlM;}4Rhlk|0YGaZs7bx99w+0OEe)Cv38^Sj>T^m0DLny0nHt-Pef~6p z`iqNlivZ&m$Cro83Z&uI3UET&F%lg_nyL$6>S>hU$h#q%o+slZI|i_L0@#C3fZr>` zt#`8`MK#;am9EgPHx^)C#q;n+IJkfPE1EKc)}<~akPB|aZKqP#6LhJ|R5JDkdp(je zXtr$!Z2HHe00r`%-1J-C#K2Go{%pvh7gP#RAYaCKGzh<(sEb&#CZQI!#Xx;P03n1c zkezi6;=8YA+eKy>u#P=TFoAubGE!N>b{V`&SJ&l`toZq!xtcV&d2u_c#X`P>VeHb)+aPb zREv@iLgw<-6haO|wn87J3q?}zfny+G+dy2F;WuUN$3xJ0K0gSLx&*1$8en`5%)H+j zZy($CLFJSKu=RlR>||4T50+d)^*FBX^MD>Q?*!`8FPQ`zqZ>P22y$1?LHS?L0I9Yb z5vJr`CvxvuzeX@2g}l>F-8p*zGN+6mc=o^TCJEwtD#Bz0==A53j)UmAS>U{~9RP`? zn$@jF>K2z?!uIg=ffWQvk(x%r0Wvd;zhzzY9vT|eB?qBaL=K2T*Nsf0D0@1Xh!guK zK0k@rxrXN62U?~mp984dD5_JSIF7p9#0)ZDECEf+DC#z_X>xG+;o~SQ=W;jN-CWFIssh=KIPu^dwz{}+#9mn3k4uOv}G2*9*#W-eQa+&umYn8=}vs4 zwU^H5%b-}W5}3S%dm1_b)h;HjMq}eh;z88L`JyVHP1wiO7k2{a0tkoH<%tgiU#?QP zC_lLX*0B4SmU%NOX-?5^Qc)AE4P+lc?v!eFgl(A)(mw$6B?DAvrB-94L0ty8xNho{ z@JqneJotp2IgaJ0igOv5G4jZmYUd*&)pF}8fZ=lEMoGOTDQOsaDzP18#FyJ&?S*WD zCLj58HVZPVsW|Gk9tW!;1{?3zn#gaKwI|!GJ%J#hUKtP^G9OJ4@8zbVznwi zZWB4Sd2}3{!}^KzJ-S-#^@P*NqSig0vbo}L$5%w5v1=2d#V~bA%n5 zWzj#i0aOLtX7Ipkz|)5nb=TNi>@lbV6mCOug&_5>${IxZ#X!BR5Ib>-=jDD#p9dB_ zOkdTK`*TSF3KCfN({(E)+0dz`%Wy1v5P+uSUl$Oo7x>H9>Dh^@kKs3~S#1uF2I;Mm zStl3;cB3gPJj+q+uNNYpZCn>L7J-LE`ou;0#H`c9iruPLMctxEgFC$~QHX46^}^DK zaFiC=0#7g;K@8vh#`RcO0JXKgs=oz>B=c{soiAMXG+bR=cdzEXG#wdpYcv573glPc zG_=wfp}aMlnve}DU&_9W9j8YBXEpS4W`JkF%>zLSD(<{80<5|m&X#o>uS@_3))k~B zXM!Xue;)0bH{C41v6JP5%pf)ERbQcT89s-g2Y8$fz;S|vO0GO3!bQ!4aa%{A4sn>8 zEIRM!!@rbXUk4ND7|LOX>N@}pzcMuI_W>;R?gjK9& zA(09}4j{GCp(WxofD`|4+!Q#29X;bkr-0Ttt*YYKdl_&|ofuv>_jzh3-7;PbAT=2m zMQ+o=Q2zULklS5d_Uq_KWdtK!(6<6`rRs7pjpD!x@Lr7IDXh=2=O|n`d!Y^lJmMoz zcr@;$I!8tokV05;au2P}f4349hWxVu{yvi@0gM>`8B-yb{J)^gu$`wef@4CL<%~Pxas{)c<<5xkNv6;y-9-N@e z45t!i3Y42#L-4>0FQS1`04xO>fq!rZ1^E}lDmQP(;=ZVbBV?W$Y~TA zu&_hsk(26)Pk}F^H7N;H*YfH_QM>QdRel#ZWtPFGZb4b{#KxWnZ2bKOrmdG5V|BoS zb(KfA$Y26UvQm;HbbSo3+>$`unbxAE0d+fB^o%tC=9{B}yw%A?JqXPN!m1@G{Ht`C zwWXzH$BQj9uRZiouD$76OyB1;Of_(bQ*G9$bkIz~PRcgijiUsoR5CI7XJ71Lfv)$# z^fT@X_!O=Hdgvw_#o7^6-$3?xK=zT?(e)IteZa)%5AaH%TuigLkwfY4i-vpj%swOr z#ZV4{$E_atMMLGk!&2i(Tp0x3YV>&!H^LWLB4JMb&nFqX_vrM%xXBHW4i(*EBd?%9 zNy0n;aZ~{SH8e)?2o$FRg{sei&#SgXK_Bg4?W6|Qp4;3!ai&oU?B!^#0y*l=8UPw$ zqRs*I52Shlg+k9m{*JIvm8AsT6b2DqMxt!CGw8GFKoLcUKxax7vlQqO_SezjxfX{+kL3KC@7~?B2&XMa&!<8a)eDP;T`}O={vjY;K0o3B^tuep=RF)zeboU2v(C>Cmor25X_ zJvGjf14;^LUj_ubdmUyr2Bw!#Dh<4c*97HCn^IPzQ*F`~4nhEOc;tWa?HF-RSRZV; zr*Y>1?S2z~u@n$rY7o-jLu~p~VNU0-lQ@3xU(()^5r!^10WwBij~}iCW(i1%5Oy`4 zIbM?v%4OeW68t(eqFsb*BJ?kETL1MgL3jVwd&mFc*-zkaI%nzuN5UsSDqf!}gi7b{ z1*o69Z*}rfKpS9vCyn){D}j2SX$yyRaqy5cRTLdm4tBPA6P$w74FFJ?KYl$w+M&Ag zJ|HTTfv@GFJfXSRAT{{W&X5gWZwJ^hu!!Sfn{O@!l2IVchsr@!ig^><0G-MB9}a+| zDGR)HA)z;)Pk~D&CE;Y>uG+ZxR?kisez9xt)|CL=vmdGOi-R8%TBSzKP2^TBSGN30 zfcg>2p5J8o{ny-mejC=P;z7`|`Z0)>Ha-dJiQsEHB7dhk!S6NNG^~IG`|x`3Q{-OI z?r#$eUiIIfkF2mK`R_+aDB^$q_4lH7zwm!QmHU4od;I^aM+yT7BqX{V48l*P?g4JV zF56-5*x!iXspwgP3uY~26TsoY*mWYUFls+EbC4#6G#=*ioPST+Efp?;+tcWv_2>WL zxIPoDT=pfXr*ZB9YS#4Msv!un8;>$_RPOA6OU~?as>A_+FBJcuOKrY1G&Hn~z|||G za$GB3w4NLtLdPwJp`ynln-^li%((^}vxX^52Rlcy<3>98jYo1%g@vXbb)f zpv?D5jxVD?pW{TbBdC6z#jX+hEzu4!A)xCIQT5%7xGwy-%sE|h0N1B#Zv@{eH)Pc# z-`%d(c-jjn?SC}-c5u|`kss8wL1QMXQbuLR zO#L3cu{=riSzCI^=BRWFf`CTPta3hB)#>hayxli)egh!jvn(fVhnwu(df^I>%#iX+ zjv1^@^Ba4_&EA;3++dpb^NDgm0;87=Pf2_t^t4M=BAIF|5cbsaUG=17ci%J1@;hbV zpp=8P_hR|j8^3l5zdGf4Y223aUB&S(#t}X=g0fZj05a!Lc6{W}bMV!~G4I;UgIzUl zDN^r`f=Zte^HPNm_VB)4EPL|v>klX6$`1r%Y<4wEZ`FshQ3~2;GZtx8YE~moBUiGW zAZRd>i$RT1>R6E9;GIdmd*#RCf{3wh7ap>+HWp170H7-kbfKleMx5MR#*&kq+o zF=2;ST4rle~LRO=J}jvFmgOM(s}O_ z4HM;$OYGY%>PRNjk@U7){oaF++XSuATL%EeLJD0?Lxkps?Oqw^6DNC)Im5N*Ha1}5 zLl2na6iXu6>hzK`2m5uSoM|jp8VtVOpJJEd{@sOJetKNedvq~IweRl>MAGgk2Dk<= zY;sho*|A*M89-scewuP7gHjaZeD;&hqq_1pc0fmEFz&2aR)vcn-7e2>TeuF-_Hi%$ z^=5BFzTh(Ro4u2gH3y29XuX{Zs-CA5S$VzAEm`b0m$q_o87Lb;#k6~{C;EOgCUdi z&drr?XhmjxgUUFfr1!c_pfC&Woa0|Dsy>v?m-!YR#%u9zt`_%PP2HuZJmb5LKuGSg z_eIAKqE!Q0onM)A-w#pI&t7KEWJtNF|8=7-z?x|CCcI2T=luk?NcHn1u zdG>JAp5x`XJEHpRV|q}pPFJuDz*$u~pW>e+1)j6>#)Lc%Rt*TI=U?jOpHbzrt8a7qiOxg5SldriK( z=d)uvmDJ)IL-!F4pX1jVz|I_{=0 zu(UVX)j6W0u+o)>W>WJ_&a%T&GI8zb^ns7~2P0rnYICvQ8{E6N$&L!fKCOb#D=!X2 zbR)Td8SO*e9!p64l;Zl@>Nt_V3>?C8Q6YS{0?!*9y95#~^T2ou=J-BTejUEwe1+DI z0u>9KxwuSg7Ou)ZqxP(`e5?A7)$~m96;tS4$y0AFegPNN@{ZAU?zA5-t7uE8*@@^g z7lB)k9U~jbFv8IJ(2DU_5?Vec@$R(`7a~qZo-!7G&gZ64@>6YYT-mRTR%i+3#sgY> zPjs%7&6R^|{YmM5x81c^d-{tn2plxw6A9W)EN4j z7W5{mnBY=*eG@)}g4x7vXKUpy=K5_t2~4ZzB|01QvLH0S^z#*$TIZv1MjgeX=~kD# z^5C^Og_2u!Qr<+vk;p|g{CroNC6Q?4(8$;7IugBhng5w|1kPg+e@%sLs99)UCZe{l z_6?i*(4yoDcc|}9m(aH1fX8AwHv}2!@vumY*!RC!m8`ngJAzrI9}3;e*odBx1!D#H z&y<5hd$hVQ^+eHq1Q`JX+fMzsAyDH8PlZjX_Dg5l-zb+TjmfLOY-DSU-VB!Zdi|VQ z-ud*5m*zxi#rb6bF@Cx1V6tzvpt#Sh4_8|KjcM6Q6YbeOZ=@ry!dsnGU_ALQmGo7; z>k53}x|Un|Q+2nbmR01uiwxY1asV}dSTU)W0M2|Cds^jqHZ2;L=1^6={Z#Y$o{o^M z>_{v_r;-$=*-kb@IE)9)E)@pA@*qI6H`&?l3;lV_*VrOY8JT7ZU(#ah)g3x_Ct%{W z@f(S#M#&X#3b8ZF8&oQHm&mP|ZXO92u!s#T{-81E5ts6CHjB@xMQ~`kk)LT`xqLnw zxCYmC;ARztTVP14Cr|;6B}6HuxSQE<=hHPfq8%a}O}vU}N^iGo<+h}K)&vk|=U6T-PdD#rKO zB}`N$l)G8)S)Zsy-(6aAk$D|-SG2WjRFtDu)lj&sc&Y9{gACj+^m1H6+|TYAWd*t4 zeB|25j8+g8U9K!nN#Ip#P?#Mo-F#tXTRW&CQzr)6H9<(I&zUrM&O#~*H~;Asr%LCB zkj2PV`{!}Gg^qKbx!`Jl1SMR(CTCFL{G(NE=Zui{vO%l7)XumO= z=cfykU5v{jji*hzE?h`W*RQlMH$<6Sn%HVt^*Ir6t?Nz2B~`R8nyEd@o=amkH-5^v zOIAT*k=~s0ZEJ_@ym}fPB2vGZTnY6Ta@^Jn42n(?Zg4N+=w7uIZZzWg^lIm(opwyi zDQ_p~Y?Cgf8mosNvi--# z2sb1YJIFu>y)%`qzff{SAz*ya(vjB66}T3jGBIA-7U!wjw>0fFVLY(p1D8G1K?T5l zXny>7IKD6GP6|tKG2Y~PXYO$KJC1|HoxkET1as6hc@4xl3LBFy;#OWN_gKswzL~_S zdb&1(_iTaooc;lmY^-pUGq=TbkK@fsD#&aAR61qW;cnN9qT^khym1cN0RthWZy8OH zdHk$hqw6zUtJ{n?_wM~Ir>v!@Nu06W^&a;zC@z&e6S1sR@3IJ1Nz1Z`qlYp!KD?5r z%`kX5=sRo>-X9%t^hKvbvW20n@<&_a&Q_Njaws$6!+%bxQ@pz_t2S?TkBBXEq}qBX z+Ff{N-Z{s6N(_vku~#q}`o_0r%H`l^wzh1Movo|sku=56mC`@j>$$n&%XVw2ZIO*LBE}Bh7$m20VPx(|G(-*EPwU(d?|kTP^2DxhC2o z9YpZ0zZ}eV%<9UgE!fn$#IDkOj@r~=@_rl+fxKYH_j4R?cQo4#Glm`lg0NMa5&oeVukCoa$3A)@fVkVhw z72*-BDv6?AntMFn%Rc>4^NcxIm-D&=zexN)&H;Q$!hhk<6GgH2Pw3A6jq01vX|-Y> za(n-pgV!%y;8( zl!=rQWHu3X7uhLUGIH3W<|#bQJMd)#Io@9J!#BO9yN^4+NwzL~$i>gBP6lW$Se6us z0JFL5q@r?qbF0jI@J@hjyx6^PY3F=f@0)ViB-7;r?+;P*N6PvgE#BFN-Ji}27J7_q zncprq0dvva$}V-&ixa%Y(%d;Da_i>_N7)Jar;Dl@scS#!`ZFu`t}@gSjaVulfU18YTJ`{Z=GMt}qX}%gsEO0dk-zW>`p$81gHVYr*Wu z*KhG~aN(OI@Vte3Cy(**vRp66C0 zC!36~E(0Etn8g#&JJ3ELq$D;Ib+leJ;Gg6xUQe_qsJanNDM^@=mc&XK*C-kO?M(G~ zrx>dpP!s?wsGAb}bR~9qLgSxZv@b^Ki-)r3zgB6p>WOh)W%@1|8t+jS#XB;TC0-c0 zJSfHr&d9iiR`jAdBInSCe>>JYsS#zQmu3$eoly3lVAQi)sgRqM@QGQOoI`#QO8n{Z zorzDyoe?i&v1QAsb>XLr4x7jY<)=~ZXK)WR_dMliDlD5thh#lhittMX6Cu;*>yLpxOfd46{}8Jm^9`YXQ@2|i66V>iLzpJ&Z=dJ=D>k7 zwQ8;tHU@5&8amxtiyVV8r$yT?jC;EdsJf0++tDJX_|q_di%nnZuY-EutWrZ!-0xsJMfG*{EJzwkrXbytnYg!j&!r=LLm zeW!x?ck(57SggJ09X(#7`SUiy7DT`6>*5sAG-JMe81=GFQ)flC7>{-j-*R-B0Q1HNq7Vw(B%k> zdy*7qUzs<3PQ_ek2yTQb3mXQsPMx}Z`0Zhn9FFI9A+ex2y|!8fs% z_W}?Z{X_O1VFQvc18)WD>B)4*aPZ1T9jn*U>M(9;V0)c>2IkyLvZe;>#|G*T^?q?b zj?Ljj)rE@m=|3TQrt#-F zQk69vL=x3Lwo>z8jmy}0sMSr!?-%1!$lozaB;(b-QS z#It#!CAg|UuK-iiVoE||3`?~<%J!&EYcwOu_CMCRxdnB-u+VID`$+l7?(tNHuOERS z3H`Krug&$GnpZmQ;cXmhUya3b7MQ&q^LH~_rsGZ}GdqQIXlQbJTE}t`4vmX9kK5(4 z(Tr#AeTEgb{WYV|jkwi%Hu8CfQ{s=_nty?L7#AXDttx6~4&j?u65g}f=USm0jg z7xMC-p@xzCNj*|$Wyd^)mfc*Pg-P^ldI=R4e;B0wR%LKM$DaxJgi5JeS%(6Kb6OtO zJN(>w{**vNqPbl>cd*WYYY;z0#4LscL?06JH-mRjl4b3)=g0h+{#M6|htzmiluHRHjH!i4AsFy!gM&4VaYtb?=Nx&2_T#HlQ_u;M|+1;z5+-8#TttYMqx#d{& zXtbNS&d2+}F_YK5B$&ohM|3Z_eDvB8{T}xCYeRfrdBrEw(YH#YmF+WUzr63UyWIZ| zfNc~%2l{m!qx+^n`dy0aJ0gm5?tnqFfFfF_s`{~#%`v+EWsmje23h+0%2eBoc5JjG zxsW46AdcoLo+SHlus1Xh5;X~;{UwJ3kKZihtuwbeRA|*v4!0Edo;nOp@Y)9s7d=s# zQE2ykn&YHIEYHK*u&E1?q@;^3Z$b!EYKWShBjdN@ovK1xwnXoMssBdj*0;eOKHZUi zL>#>iXKZ3R#hqIOI*n;DTm^z_LY{4|aL80wwn~_lEUA(MTvcP>vQgzGBv<(6bm$m} z2N^caZYpa>TW=_P&TEQdjiJ^Qov4S@(Xf%Lp;Lneo9H+n$q%^ap%*b~^qLxe1+!(? zZDLkW-XTDX0SJAVa(fvCQ0uAQWLOMjdtGm+G79kC3DLodUN8SmrqSWCI<->XyVn?Z znn@O-zOzStJV{KKM5cSQ!dw#&F3RD%inGkVk3;|-Fo8VRizO;4QQ~t|GhLw5K=w== zjJloVeaN4zsQ6HIU&%q-SayzQtn;5)HNlbcaSeKL=JhqSbB*~WBx4)(n&_oo&r7Z8 zkyvmZyRzU)NbettxH}7iGg#K=&>U*W6=jaMn6R7tnm#rFJ8M4{u7t}Y>^DLLKDhhT zY2n{d#1PVtXU?lS(?`rqOpv)jko(0Q4T5VYhHTx1Lvu52OLui>l7wt`oQk?IsIy(H zvB@Mb9Etw(axb&9xb4zoP(?vqvrsPZwvVQA;0@Iag~E z4jb@pCbN`j6E{pfFifg{4^5B$gSglwvAG0-xyv)MBxt6$4J7L;ev5>@H^&dYt-f$qcy?f7 z@1{C@AnFnoIwNf$Qdm2OV?fB8#vAM~cBKt%8I@7~&J;obS=jfNjgO}Q?w$@;c0s)K zQusD4)O_owD(4(M^eBZiD&Jawdy93|p!D?Rd1)j&7AUlgqRM+_1NI4r(6WElk59K5q>|I0^RMRRxPYf{afcz2)jSv@j$b1@X`AiIpqPjKcrRKVSVwMdqQ@P5H{*=M zF`opJSHDW`UwL}U$=&G%+wPL0rfn6nPliZ1n!Eij*8bba$7j(x^R06)pL}r386*j+ zyQi32k?x~#n6Ejdi>Zp#$8%=d+sSuQmxXS}j~!pl3|3y}uA|qty$)tX2=h-^5jpu~ z+Kc*Cb1IKW+y4kW)Zz_Xh^f_CmWh6=|Evf14bJ}rS|nYQte`e^J%&ShnI@s@_aW)G< zIBtSlStBm`9gse4h24z^iT5Jv@1i4%M6P`u&R69IT zYx^KlrM0$vl@$H?6QJt*}&&7@y;hc>EC?pa}eRZuPvqyRi6%U$Oz{u%^>KR)H;-VHxH!Uob^3^{#u=YCR5mov6Yne=DeRbw*$H~SG zKCDA*=asy#WPV%~)(lBQ3+~h;LfyH_r?}AqQuGGphq-fd-?8n474`%s-I~qX;idPo zb@I>-ZpaRwJXgJ#!G1|)g8>ZCi*|I&rUSP;JCS+d|erWyn?+ca~ENTSWrJlQ5 zAkN5sMYRyivmJ)((A3~!$fJ*VNm|J@B%XUGP!L>Tc33ZDG?$ZzvJ6HrW%QaH)Ekth zl38Q%)dIidYO@d@0IQ9Oj0EFFOL6UIPQ+fEfZ0VSLiao{cBng zS;=c9?ycSSxAmtE7;Tm=ObW>UUM`p4s3jpM47w2Ojg6U|6M2*;;T0xx;H{g{9Pq_B z66h;%dUY&S|M+MAU@RDBC8s6!elSnDFuqi`#=2&oInf^BcSNyos^||Gw9}2@aomCf zPZHWpqOyfU9*yhkb$Xy`k|e0i`Os~VnZ*kw$rPO;_Z&Dzy-hGp%S1TVJ{;~Xq4 zQ`Xy4XP}>(aBj^_{@%o*&S^N6NTb$NFIk4_h*?kq+`KcfUB(skj5EKce5X+jg*J5vkzL%QGv8kdR1|@wkpsYFq)dJ z(U=Q!uNJ^K3!_TWgiA^OK=!7R8}6rZvyeO&(XL|CZg$l;l;=&JMfy0rEFEE+Qs2)80OMr`td#-TH~Cu0$*2$| z_ji_aKPr+ks$a46eu2^oIT&dv|JR_wsD3ANb=4&~a0EYf{!qFA&++ zZFB}2bKUr-S6Ma27J6lli3X^^F+^a2??w_^91tdyAnuhfZ?SmXB~MAI#)Y&KREAe>z|Pxb?||71-Oj zc{=vTMz}71B{&~1Yi&kOddRu%C2#Q0f-!5eBm?cWRZciqR4$*K=zN z{(W8qdvZ{U(GlqW<)*5{^7#IEc)Y?)`GIc9zIuqj>Akzx;G>W_q{x}_YT1&Vn0b8! zKdl^#uBn<=YE^9y^IIbgj+-Ur2juA!Ga5IM6w2MG*MEacdqh^k8@!_vh=W#)_ym4d-v`9_Bm(bZw&r!Kx5LmE!Pj!x}l45q@ei%(cL;p=fgP1b3;& zKYP|PXK_C_CO={w8lZ#9!;ZO0dR{QkOj}qBcyzUin)eBm;G$G?@pcfX5n$&T`(_oX zjU52~jFdGI#IrXZ3qv9uMqxgWxrY`uoE3iv5ua?*%qZ`2#xWt|phW(KodK+Rm5HWv zDGS4fMe?M$FC|TNr9|t+9iEt{2$bI`B=jyOkRaLz3zeHfpHvelS0p)42@*>CT2%=F16 z&M|{ji56Ili2{yzEbZCB3v!xn&Iw!w;1e*jS{LksF_N z%kE#ZSSwG>tkG?1h9|5?H)gh#&;2@(vF77_tI{f^FCIS>K=+_6?VUfmw5H$C@l?!^_5^qaQainjI-_ExCX2{-ZCm6e(qSK|OG91^8;DpzRST*WCo#;E;H zT76aNYYMp^q`Qc2BZ_e1X8Xafohw-7=nnPU?cdzp(sVezKqg-y?PV`Dj?6;|$gwnF z_a?0azui&uosks8qgBLTc-jb#923~W@z&f7(8_lvjfWJi=Xii!k|5E;x8RthJXhK7k^kte)CoKRBpKt6C^&9|s ze9kzG$|X(U#$n^O%t!#Sfru)Zd8EGS{t7d&kz@aw%}@i9yTJ2VYXL(<&l<7f97KLg zMBWEu@3mSxcfqy4I%V+HRi%8|=ZI#1enCfeOU1mfxUX7pdF8@j!)ubixv0ZKJJ+`V z+fM+GMG}X|7ZqU&q{}-1fcc*Ext!^s`=V0j=@whm0dNquLaBKb3?*L*#n&qqW6}h8 zSfF0+Viwu@XB`;S$D6sH!3Jwp%*(0GwYzaumdcCDi%yS2q6i(#UHOv++DoKGH@97x zOU9_3C7r19J+oz1^9W>{p#!C`Hm?F^iJFzHx@dEn(uDv?o^J$s2oSUY(N(UT@c;wt zs~gGfZ_xhBa`tWW%6Yj}0j zfI-!NS}P=R{9pW~Jx5XhTv{>R<=UC3K>tYdt}FhVbS1)n+#h~-S@@+F zz{ySIs!&iXT>h@|4}26@m|N|-dN;prh(W@g{})#&{-XH*Fq!^`V*>p2{|{H`;?4CG YoqF0W`pFu5`JmWcb+o2jzIFe<0M3JKQvd(} literal 0 HcmV?d00001 diff --git a/tools/test/perf/CMakeLists.txt b/tools/test/perf/CMakeLists.txt index 4a1a8d0b1..0405adfcf 100644 --- a/tools/test/perf/CMakeLists.txt +++ b/tools/test/perf/CMakeLists.txt @@ -34,12 +34,14 @@ set(CUTLASS_PERF_TEST_HEADERS ) set(CUTLASS_PERF_TEST_SOURCES - cutlass_perf_test.cpp + cutlass_perf_test.cu gemm/sgemm.cu gemm/dgemm.cu gemm/hgemm.cu gemm/igemm.cu gemm/wmma_gemm.cu + gemm/wmma_binary_gemm.cu + gemm/wmma_integer_gemm.cu ) source_group("Source\ Files" FILES ${CUTLASS_PERF_TEST_SOURCES}) @@ -56,4 +58,6 @@ cutlass_add_executable( ${CUTLASS_PERF_TEST_SOURCES} ${CUTLASS_PERF_TEST_HEADERS} ) -CUDA_ADD_CUBLAS_TO_TARGET(cutlass_perf_test) + +target_link_libraries(cutlass_perf_test ${CUBLAS_LIBRARY}) + diff --git a/tools/test/perf/cutlass_perf_test.cpp b/tools/test/perf/cutlass_perf_test.cu similarity index 60% rename from tools/test/perf/cutlass_perf_test.cpp rename to tools/test/perf/cutlass_perf_test.cu index e77646d35..dee4c5afc 100644 --- a/tools/test/perf/cutlass_perf_test.cpp +++ b/tools/test/perf/cutlass_perf_test.cu @@ -27,19 +27,24 @@ \brief CUTLASS Performance Tests */ -#include -#include +#include +#include "tools/test/perf/performance_result.h" +#include "tools/test/perf/testbench_configs.h" +#include "tools/test/perf/testbench_options.h" +#include "tools/test/perf/testbench_output.h" + +#include "tools/test/perf/cutlass_perf_test.h" + +static std::vector GemmProfileFuncs; // // Profiling entry points defined in corresponding .cu files // namespace perf { -int profile_sgemm(TestbenchOutput &output, TestbenchOptions const &options); -int profile_dgemm(TestbenchOutput &output, TestbenchOptions const &options); -int profile_hgemm(TestbenchOutput &output, TestbenchOptions const &options); -int profile_igemm(TestbenchOutput &output, TestbenchOptions const &options); -int profile_wmma_gemm(TestbenchOutput &output, TestbenchOptions const &options); +void RegisterGemmProfileFunc(GemmProfileFunc * profileFunc) { + GemmProfileFuncs.push_back(profileFunc); +} } // namespace perf @@ -47,6 +52,22 @@ int profile_wmma_gemm(TestbenchOutput &output, TestbenchOptions const &options); // Executes profiling functionality // +template +int profile(int (**functions)(perf::TestbenchOutput &, + perf::TestbenchOptions const &, + perf::Config const &), + perf::TestbenchOutput &output, + perf::TestbenchOptions options, + int result) { + perf::TestbenchConfigs test_configs(options); + for (size_t j = 0; !result && j < test_configs.configs.size(); j++) { + for (size_t i = 0; !result && functions[i] != 0; ++i) { + result = (functions[i])(output, options, test_configs.configs[j]); + } + } + return result; +} + /// Entry point to CUTLASS performance test int main(int argc, const char **argv) { cutlass::CommandLine args(argc, argv); @@ -57,20 +78,17 @@ int main(int argc, const char **argv) { return 0; } - perf::TestbenchOutput output(options); - - int (*profile_gemm[])(perf::TestbenchOutput &, perf::TestbenchOptions const &) = { - perf::profile_sgemm, - perf::profile_dgemm, - perf::profile_hgemm, - perf::profile_igemm, - perf::profile_wmma_gemm, - 0}; - - int result = 0; - for (int i = 0; !result && profile_gemm[i]; ++i) { - result = (profile_gemm[i])(output, options); + if (args.check_cmd_line_flag("version")) { + perf::TestbenchOptions::version(std::cout); + std::cout << std::endl; + return 0; } - return result; + int result = 0; + + std::vector profileFuncs = GemmProfileFuncs; + profileFuncs.push_back(0); // Passing as array reference below, so need NULL termination. + perf::TestbenchOutput output_gemm(options); + result = profile(&profileFuncs[0], output_gemm, options, result); + return result; } diff --git a/tools/test/perf/cutlass_perf_test.h b/tools/test/perf/cutlass_perf_test.h new file mode 100644 index 000000000..70320740e --- /dev/null +++ b/tools/test/perf/cutlass_perf_test.h @@ -0,0 +1,44 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#pragma once + +#pragma diag_suppress boolean_controlling_expr_is_constant +#include +#pragma diag_warning boolean_controlling_expr_is_constant + +#include "tools/test/perf/testbench_output.h" +#include "tools/test/perf/gemm/gemm_profiler.h" + +namespace perf { + +typedef int (GemmProfileFunc)( + TestbenchOutput &output, + TestbenchOptions const &options, + Config const &config); + +void RegisterGemmProfileFunc(GemmProfileFunc*); + +} // perf diff --git a/tools/test/perf/gemm/bmma_gemm.cu b/tools/test/perf/gemm/bmma_gemm.cu new file mode 100644 index 000000000..147b5a4bb --- /dev/null +++ b/tools/test/perf/gemm/bmma_gemm.cu @@ -0,0 +1,121 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/// \file {nv-internal-release} + +#if (defined(__CUDACC__) && (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 750)) +#pragma warning( disable : 4503) +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/bmma_gemm_traits.h" +#include "tools/test/perf/cutlass_perf_test.h" +#include "tools/test/perf/gemm/gemm_profiler.h" +#include "tools/test/perf/gemm/cutlass_dispatch.h" +#include "tools/test/perf/gemm/gemm_perf_testbed.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct BmmaGemmDispatch { + + typedef cutlass::gemm::Gemm Gemm; + + typedef typename Gemm::Params Params; + + /// Indicate warp-level GEMM + static bool const kThreadMultiplyAdd = false; + + static bool const kRunCuBLAS = false; + + static cutlass::MatrixLayout::Kind const kLayoutA = Traits::kLayoutA; + static cutlass::MatrixLayout::Kind const kLayoutB = Traits::kLayoutB; + + // + // Data members + // + + /// Params argument + Params params; + + // + // Methods + // + + BmmaGemmDispatch() {} + + /// Initializes params object + BmmaGemmDispatch(int m, int n, int k, int alpha, + cutlass::Vector const* d_a, int lda, + cutlass::Vector const* d_b, int ldb, int beta, + int const* d_c, int ldc, int* d_d, int ldd) { + + params.initialize(m, n, k * 32, alpha, d_a, lda, d_b, ldb, beta, d_c, ldc, d_d, ldd); + } + + /// Initializes params object + BmmaGemmDispatch(Params const& _params) : params(_params) {} + + /// Launches kernel + cudaError_t operator()() { return Gemm::launch(params); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace perf { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +int profile_bmma_gemm(TestbenchOutput &output, TestbenchOptions const &options, Config const &config) { + typedef perf::GemmProfiler, cutlass::Vector, int, int, int> GemmProfiler; + + int results = 0; + + { + + typedef cutlass::gemm::BmmaGemmTraits, + cutlass::Shape<1024, 32, 32>, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kColumnMajor> + BmmaGemmTraits; + + typedef BmmaGemmDispatch Dispatch; + + results |= profile_gemm(output, "bmma_gemm_tn", options, config); + } + + return results; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +struct BmmaGemmRegistrar { + BmmaGemmRegistrar() { RegisterGemmProfileFunc(profile_bmma_gemm); } +}; + +volatile BmmaGemmRegistrar _BmmaGemmRegistrar; + +} // namespace perf + +#endif // if (defined(__CUDACC__) && (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 750) diff --git a/tools/test/perf/gemm/cublas_dispatch.h b/tools/test/perf/gemm/cublas_dispatch.h index 0679e5c8e..a30e3d96c 100644 --- a/tools/test/perf/gemm/cublas_dispatch.h +++ b/tools/test/perf/gemm/cublas_dispatch.h @@ -24,8 +24,8 @@ **************************************************************************************************/ #pragma once -#include -#include +#include "cutlass/matrix_traits.h" +#include "tools/util/type_traits.h" namespace perf { diff --git a/tools/test/perf/gemm/cutlass_dispatch.h b/tools/test/perf/gemm/cutlass_dispatch.h index 9c0f89a26..f6c85ba64 100644 --- a/tools/test/perf/gemm/cutlass_dispatch.h +++ b/tools/test/perf/gemm/cutlass_dispatch.h @@ -32,7 +32,8 @@ template + bool ThreadMultiplyAdd_, + bool RunCuBLAS_ = true> struct CutlassDispatch { typedef typename Gemm_::Params Params; typedef Gemm_ Gemm; @@ -45,6 +46,7 @@ struct CutlassDispatch { typedef ScalarEpilogue_ ScalarEpilogue; static bool const kThreadMultiplyAdd = ThreadMultiplyAdd_; + static bool const kRunCuBLAS = RunCuBLAS_; static cutlass::MatrixLayout::Kind const kLayoutA = Gemm::Traits::kLayoutA; static cutlass::MatrixLayout::Kind const kLayoutB = Gemm::Traits::kLayoutB; @@ -60,7 +62,7 @@ struct CutlassDispatch { // Methods // - CutlassDispatch() {} + // CutlassDispatch() {} /// Initializes params object CutlassDispatch(Index m, @@ -84,33 +86,6 @@ struct CutlassDispatch { /// Launches kernel cudaError_t operator()() { return Gemm::launch(params); } - - /// Determines if problem is aligned (assuming no padding) - static bool is_problem_aligned( - int m, - int n, - int k) { - - bool aligned = true; - - if (kLayoutA == cutlass::MatrixLayout::kColumnMajor) { - aligned = aligned && !(m % Gemm::Traits::GemmConfig::kScalarsPerLdgA); - } - else { - aligned = aligned && !(k % Gemm::Traits::GemmConfig::kScalarsPerLdgA); - } - - if (kLayoutB == cutlass::MatrixLayout::kColumnMajor) { - aligned = aligned && !(k % Gemm::Traits::GemmConfig::kScalarsPerLdgB); - } - else { - aligned = aligned && !(n % Gemm::Traits::GemmConfig::kScalarsPerLdgB); - } - - aligned = aligned && !(m % Gemm::Traits::GemmConfig::kScalarsPerLdgC); - - return aligned; - } }; /// Basic dispatcher inferred from GEMM traits diff --git a/tools/test/perf/gemm/dgemm.cu b/tools/test/perf/gemm/dgemm.cu index 7e9c16443..3f4b63b85 100644 --- a/tools/test/perf/gemm/dgemm.cu +++ b/tools/test/perf/gemm/dgemm.cu @@ -23,26 +23,29 @@ * **************************************************************************************************/ -#include -#include - -#include - -#include -#include +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/dgemm_traits.h" +#include "tools/test/perf/cutlass_perf_test.h" +#include "tools/test/perf/gemm/gemm_perf_testbed.h" +#include "tools/test/perf/gemm/gemm_profiler.h" +#include "tools/test/perf/gemm/cutlass_dispatch.h" +#pragma warning( disable : 4503) namespace perf { //////////////////////////////////////////////////////////////////////////////////////////////////// -int profile_dgemm(TestbenchOutput &output, TestbenchOptions const &options) { - +int profile_dgemm(TestbenchOutput &output, TestbenchOptions const &options, Config const &config) { typedef perf::GemmProfiler GemmProfiler; int results = 0; - - if (!results) { - + + // compute capability check + if (!options.compute_capability(6, 0)) { + return 0; + } + + { typedef cutlass::gemm::DgemmTraits< cutlass::MatrixLayout::kColumnMajor, cutlass::MatrixLayout::kRowMajor @@ -50,11 +53,10 @@ int profile_dgemm(TestbenchOutput &output, TestbenchOptions const &options) { typedef typename CutlassDispatchBasic::Dispatch Dispatch; - profile_gemm(output, "dgemm_nt", options); + results |= profile_gemm(output, "dgemm_nt", options, config); } - if (!results) { - + { typedef cutlass::gemm::DgemmTraits< cutlass::MatrixLayout::kColumnMajor, cutlass::MatrixLayout::kColumnMajor @@ -62,11 +64,10 @@ int profile_dgemm(TestbenchOutput &output, TestbenchOptions const &options) { typedef typename CutlassDispatchBasic::Dispatch Dispatch; - profile_gemm(output, "dgemm_nn", options); + results |= profile_gemm(output, "dgemm_nn", options, config); } - if (!results) { - + { typedef cutlass::gemm::DgemmTraits< cutlass::MatrixLayout::kRowMajor, cutlass::MatrixLayout::kColumnMajor @@ -74,11 +75,10 @@ int profile_dgemm(TestbenchOutput &output, TestbenchOptions const &options) { typedef typename CutlassDispatchBasic::Dispatch Dispatch; - profile_gemm(output, "dgemm_tn", options); + results |= profile_gemm(output, "dgemm_tn", options, config); } - if (!results) { - + { typedef cutlass::gemm::DgemmTraits< cutlass::MatrixLayout::kRowMajor, cutlass::MatrixLayout::kRowMajor @@ -86,12 +86,18 @@ int profile_dgemm(TestbenchOutput &output, TestbenchOptions const &options) { typedef typename CutlassDispatchBasic::Dispatch Dispatch; - profile_gemm(output, "dgemm_tt", options); + results |= profile_gemm(output, "dgemm_tt", options, config); } return results; } +struct DgemmRegistrar { + DgemmRegistrar() { RegisterGemmProfileFunc(profile_dgemm); } +}; + +volatile DgemmRegistrar _DgemmRegistrar; + //////////////////////////////////////////////////////////////////////////////////////////////////// } // namespace perf diff --git a/tools/test/perf/gemm/gemm_perf_testbed.h b/tools/test/perf/gemm/gemm_perf_testbed.h index f3766a637..27769b1c9 100644 --- a/tools/test/perf/gemm/gemm_perf_testbed.h +++ b/tools/test/perf/gemm/gemm_perf_testbed.h @@ -36,200 +36,35 @@ #include // Cutlass includes -#include -#include -#include -#include -#include -#include -#include +#include "tools/test/perf/gemm/cublas_dispatch.h" +#include "tools/test/perf/performance_result.h" +#include "tools/test/perf/testbench_options.h" +#include "tools/util/device_memory.h" +#include "tools/util/host_matrix.h" +#include "tools/util/reference/device/tensor_elementwise.h" +#include "tools/util/tensor_view_io.h" +#include "tools/util/type_traits.h" namespace perf { //////////////////////////////////////////////////////////////////////////////////////////////////// -/// Kernel to determine if two tensors are equal -template -__global__ void tensor_equals(int *result, - int dim_contiguous, - int dim_strided, - Type const *experimental, - int lde, - Type const *reference, - int ldr) { - typedef typename cutlass::TypeTraits::unsigned_type UnsignedType; +namespace detail { - int c_idx = blockIdx.x * blockDim.x + threadIdx.x; - int s_idx = blockIdx.y * blockDim.x; + template + struct ElementCount { + static int const kValue = 1; + }; - experimental += s_idx * lde + c_idx; - reference += s_idx * ldr + c_idx; + template + struct ElementCount > { + static int const kValue = Elements * ElementCount::kValue; + }; - for (int s_offset = 0; s_offset < blockDim.x; ++s_offset, ++s_idx) { - if (s_idx < dim_strided && c_idx < dim_contiguous) { - UnsignedType exp = *reinterpret_cast(experimental); - UnsignedType ref = *reinterpret_cast(reference); - - if (exp != ref) { - *result = -1; - return; - } - - experimental += lde; - reference += ldr; - } - } -} +} // namespace detail //////////////////////////////////////////////////////////////////////////////////////////////////// -/// Kernel to initialize tensor to uniform distribution -template -__global__ void initialize_uniform( - Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) { - __shared__ curandState_t rng_state[1024]; - - uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x + blockIdx.y * gridDim.x * blockDim.x; - - curand_init(seed, gtid, 0, &rng_state[threadIdx.x]); - - int c_idx = blockIdx.x * blockDim.x + threadIdx.x; - int s_idx = blockIdx.y * blockDim.x; - - tensor += s_idx * ldm + c_idx; - - for (int s_offset = 0; s_offset < blockDim.x; ++s_offset, ++s_idx) { - if (s_idx < dim_strided && c_idx < dim_contiguous) { - double range = dist.uniform.max - dist.uniform.min; - - double rnd = curand_uniform(&rng_state[threadIdx.x]); - - rnd = dist.uniform.min + range * rnd; - - // Random values are cast to integer after scaling by a power of two to facilitate error - // testing - if (dist.int_scale >= 0) { - rnd = double(int(rnd * double(1 << dist.int_scale))); - *tensor = T(rnd / double(1 << dist.int_scale)); - } else { - *tensor = T(rnd); - } - - tensor += ldm; - } - } -} - -/// Kernel to initialize tensor to uniform distribution -template -__global__ void initialize_gaussian( - Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) { - __shared__ curandState_t rng_state[1024]; - - uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x + blockIdx.y * gridDim.x * blockDim.x; - - curand_init(seed, gtid, 0, &rng_state[threadIdx.x]); - - int c_idx = blockIdx.x * blockDim.x + threadIdx.x; - int s_idx = blockIdx.y * blockDim.x; - - tensor += s_idx * ldm + c_idx; - - for (int s_offset = 0; s_offset < blockDim.x; ++s_offset, ++s_idx) { - if (s_idx < dim_strided && c_idx < dim_contiguous) { - // Random values are cast to integer after scaling by a power of two to facilitate error - // testing - - double rnd = curand_normal(&rng_state[threadIdx.x]); - - rnd = dist.gaussian.mean + dist.gaussian.stddev * rnd; - - if (dist.int_scale >= 0) { - rnd = double(int(rnd * double(1 << dist.int_scale))); - *tensor = T(rnd / double(1 << dist.int_scale)); - } else { - *tensor = T(rnd); - } - } - } -} - -/// Kernel to initialize tensor to an identity matrix -template -__global__ void initialize_linear( - Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) { - __shared__ curandState_t rng_state[1024]; - - uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x + blockIdx.y * gridDim.x * blockDim.x; - - curand_init(seed, gtid, 0, &rng_state[threadIdx.x]); - - int c_idx = blockIdx.x * blockDim.x + threadIdx.x; - int s_idx = blockIdx.y * blockDim.x; - - tensor += s_idx * ldm + c_idx; - - for (int s_offset = 0; s_offset < blockDim.x; ++s_offset, ++s_idx) { - if (s_idx < dim_strided && c_idx < dim_contiguous) { - *tensor = - dist.linear.offset + dist.linear.delta_row * c_idx + dist.linear.delta_column * s_idx; - } - } -} - -/// Kernel to initialize tensor to an identity matrix -template -__global__ void initialize_identity( - Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) { - __shared__ curandState_t rng_state[1024]; - - uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x + blockIdx.y * gridDim.x * blockDim.x; - - curand_init(seed, gtid, 0, &rng_state[threadIdx.x]); - - int c_idx = blockIdx.x * blockDim.x + threadIdx.x; - int s_idx = blockIdx.y * blockDim.x; - - tensor += s_idx * ldm + c_idx; - - for (int s_offset = 0; s_offset < blockDim.x; ++s_offset, ++s_idx) { - if (s_idx < dim_strided && c_idx < dim_contiguous) { - *tensor = (c_idx == s_idx ? T(1) : T(0)); - } - } -} - -/// Dispatcher to appropriate initialization kernel -template -inline void initialize(Distribution const &dist, - int64_t seed, - int dim_contiguous, - int dim_strided, - T *tensor, - int ldm) { - dim3 block(256, 1, 1); - dim3 grid((dim_contiguous + block.x - 1) / block.x, (dim_strided + block.x - 1) / block.x); - - switch (dist.kind) { - case Distribution::Uniform: - initialize_uniform<<>>(dist, seed, dim_contiguous, dim_strided, tensor, ldm); - break; - case Distribution::Gaussian: - initialize_gaussian<<>>(dist, seed, dim_contiguous, dim_strided, tensor, ldm); - break; - case Distribution::Linear: - initialize_linear<<>>(dist, seed, dim_contiguous, dim_strided, tensor, ldm); - break; - case Distribution::Identity: - initialize_identity<<>>(dist, seed, dim_contiguous, dim_strided, tensor, ldm); - break; - default: - break; - } -} - -/////////////////////////////////////////////////////////////////////////////////////////////////// - /// Host-side implementation of performance testbed template class GemmTestbed { @@ -295,14 +130,13 @@ class GemmTestbed { /// Helper to resize a matrix with a given size and layout if needed template - static void resize_device_allocation( - cutlass::device_memory::allocation &tensor, - Distribution const &dist, - int64_t seed, - int rows, - int columns, - cutlass::MatrixLayout::Kind layout, - int ldm = 0) { + static void resize_device_allocation(cutlass::device_memory::allocation &tensor, + cutlass::Distribution const &dist, + int64_t seed, + int rows, + int columns, + cutlass::MatrixLayout::Kind layout, + int ldm = 0) { if (!ldm) { ldm = (layout == cutlass::MatrixLayout::kColumnMajor ? rows : columns); } @@ -315,65 +149,79 @@ class GemmTestbed { int c_dim = (layout == cutlass::MatrixLayout::kColumnMajor ? rows : columns); int s_dim = (layout == cutlass::MatrixLayout::kColumnMajor ? columns : rows); - initialize(dist, seed, c_dim, s_dim, tensor.get(), ldm); + cutlass::TensorView view( + tensor.get(), + cutlass::make_Coord(ldm, 1), + cutlass::make_Coord(s_dim, c_dim)); + + cutlass::reference::device::TensorInitialize(view, seed, dist); } } /// Resizes each tensor void resize_helper(GemmProblem const &problem) { - resize_device_allocation( - A, - initial_distribution.dist_A, - initial_distribution.seed, - problem.m, - problem.k, - problem.layout_A); + resize_device_allocation(A, + initial_distribution.dist_A, + initial_distribution.seed, + problem.m, + problem.k, + problem.layout_A); resize_device_allocation( - B, - initial_distribution.dist_B, - initial_distribution.seed + 17, // compute distinct value from initial seed - problem.k, - problem.n, - problem.layout_B); + B, + initial_distribution.dist_B, + initial_distribution.seed + 17, // compute distinct value from initial seed + problem.k, + problem.n, + problem.layout_B); resize_device_allocation( - C_initial, - initial_distribution.dist_C, - initial_distribution.seed + 101, // compute distinct value from initial seed - problem.m, - problem.n, - cutlass::MatrixLayout::kColumnMajor); + C_initial, + initial_distribution.dist_C, + initial_distribution.seed + 101, // compute distinct value from initial seed + problem.m, + problem.n, + cutlass::MatrixLayout::kColumnMajor); - resize_device_allocation( - reference, Distribution(), 0, problem.m, problem.n, cutlass::MatrixLayout::kColumnMajor); + resize_device_allocation(reference, + cutlass::Distribution(), + 0, + problem.m, + problem.n, + cutlass::MatrixLayout::kColumnMajor); - resize_device_allocation( - experimental, Distribution(), 0, problem.m, problem.n, cutlass::MatrixLayout::kColumnMajor); + resize_device_allocation(experimental, + cutlass::Distribution(), + 0, + problem.m, + problem.n, + cutlass::MatrixLayout::kColumnMajor); } /// Functor to print errors struct PrintErrors { - /// Equivalently sized integer type typedef typename cutlass::TypeTraits::integer_type integer_t; + /// Performance testbench defined for a TensorView of rank-2 contiguous matrices + typedef cutlass::TensorView MatrixView; + /// Output stream to write to - std::ostream& out; + std::ostream &out; /// Reference tensor view - cutlass::HostTensorView const& reference; + MatrixView const &reference; /// Computed tensor view - cutlass::HostTensorView const& experimental; + MatrixView const &experimental; /// Errors greater than or this amount result in printing integer_t ulps_threshold; /// - PrintErrors(std::ostream& _out, - cutlass::HostTensorView const& _reference, - cutlass::HostTensorView const& _experimental, + PrintErrors(std::ostream &_out, + MatrixView const &_reference, + MatrixView const &_experimental, integer_t _ulps_threshold = 1) : out(_out), reference(_reference), @@ -381,18 +229,15 @@ class GemmTestbed { ulps_threshold(_ulps_threshold) {} /// Compares one element - void operator()( - CType const& element, - typename cutlass::HostTensorView::Coord_t coord) { - + void operator()(CType const &element, typename MatrixView::TensorCoord coord) { CType exp = experimental.at(coord); CType ref = reference.at(coord); int64_t int_exp = 0; int64_t int_ref = 0; - *reinterpret_cast(&int_exp) = exp; - *reinterpret_cast(&int_ref) = ref; + *reinterpret_cast(&int_exp) = exp; + *reinterpret_cast(&int_ref) = ref; integer_t ulps = integer_t(int_exp - int_ref); @@ -405,11 +250,10 @@ class GemmTestbed { relative /= double(ref); } - out << "[" << coord << "] expected: " << ref << " (0x" - << std::hex << std::setw(width) << std::setfill('0') << integer_t(int_ref) << std::dec - << ")" - << ", got: " << exp << " (0x" << std::hex - << std::setw(width) << std::setfill('0') << integer_t(int_exp) << std::dec << ")" + out << "[" << coord << "] expected: " << ref << " (0x" << std::hex << std::setw(width) + << std::setfill('0') << integer_t(int_ref) << std::dec << ")" + << ", got: " << exp << " (0x" << std::hex << std::setw(width) << std::setfill('0') + << integer_t(int_exp) << std::dec << ")" << " relative error: " << relative << ", ulps: " << ulps << "\n"; } } @@ -497,7 +341,7 @@ class GemmTestbed { /// Returns the number of flops implied by the computation (1 multiply-accumulate = 2 flops) uint64_t flops() const { - return uint64_t(problem.m) * uint64_t(problem.n) * uint64_t(problem.k) * 2ULL; + return uint64_t(problem.m) * uint64_t(problem.n) * uint64_t(problem.k) * detail::ElementCount::kValue * 2ULL; } /// Computes the speed of the computation in GFLOPs/s @@ -555,25 +399,17 @@ class GemmTestbed { /// Verifies the 'test' tensor with 'ref' bool verify(TensorC const &test, TensorC const &ref) { - cutlass::device_memory::allocation flag_device(1); - int flag = 0; - cutlass::device_memory::copy_to_device(flag_device.get(), &flag, 1); - - dim3 block(256, 1, 1); - dim3 grid((problem.m + block.x - 1) / block.x, (problem.n + block.x - 1) / block.x); - - tensor_equals<<>>(flag_device.get(), - problem.m, - problem.n, - experimental.get(), - problem.m, - reference.get(), - problem.m); - - cutlass::device_memory::copy_to_host(&flag, flag_device.get(), 1); - - return flag == 0; + return cutlass::reference::device::TensorEquals( + cutlass::TensorView( + test.get(), + cutlass::make_Coord(problem.m, 1), + cutlass::make_Coord(problem.n, problem.m)), + cutlass::TensorView( + ref.get(), + cutlass::make_Coord(problem.m, 1), + cutlass::make_Coord(problem.n, problem.m)) + ); } /// Computes the reference output @@ -587,12 +423,11 @@ class GemmTestbed { /// Writes the problem to an ostream in human-readable form void write_problem(std::ostream &results_output, std::ostream &errors_output) { - - cutlass::HostTensor host_A; - cutlass::HostTensor host_B; - cutlass::HostTensor host_C; - cutlass::HostTensor host_D; - cutlass::HostTensor host_Ref; + cutlass::HostMatrix host_A; + cutlass::HostMatrix host_B; + cutlass::HostMatrix host_C; + cutlass::HostMatrix host_D; + cutlass::HostMatrix host_Ref; host_A.resize_matrix(M(), K(), layout_a()); host_B.resize_matrix(K(), N(), layout_b()); @@ -608,11 +443,16 @@ class GemmTestbed { host_Ref.copy_to_host(ptr_reference()); // write out human readable - results_output << "A =\n" << host_A << "\n" - << "B =\n" << host_B << "\n" - << "C = \n" << host_C << "\n" - << "Ref =\n" << host_Ref << "\n" - << "Experimental =\n" << host_D << "\n"; + results_output << "A =\n" + << host_A << "\n" + << "B =\n" + << host_B << "\n" + << "C = \n" + << host_C << "\n" + << "Ref =\n" + << host_Ref << "\n" + << "Experimental =\n" + << host_D << "\n"; // write out list of errors PrintErrors printer(errors_output, host_Ref, host_D); diff --git a/tools/test/perf/gemm/gemm_profiler.h b/tools/test/perf/gemm/gemm_profiler.h index 30eecff6d..6cdb07b94 100644 --- a/tools/test/perf/gemm/gemm_profiler.h +++ b/tools/test/perf/gemm/gemm_profiler.h @@ -29,16 +29,18 @@ #include #include -#if defined(WIN32) +#include "cutlass/util/platform.h" +#if defined(CUTLASS_OS_WINDOWS) #include #else // needed for sleep #include #endif -#include -#include -#include +#include "tools/test/perf/gemm/gemm_perf_testbed.h" +#include "tools/test/perf/testbench_configs.h" +#include "tools/test/perf/testbench_options.h" +#include "tools/test/perf/testbench_output.h" //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -63,17 +65,23 @@ class GemmProfiler { // /// Reference to TestbenchOutput instance - TestbenchOutput &output; + TestbenchOutput &output; /// Reference to options object TestbenchOptions const &options; + // Reference to config object + Config const &config; + /// Performance test environment PerfTestbed testbed; /// Kernel name std::string kernel_name; + /// Cutlass algorithm + std::string cutlass_algo; + /// Timing events cudaEvent_t events[2]; @@ -93,14 +101,17 @@ class GemmProfiler { // /// Constructs performance testebed - GemmProfiler(TestbenchOutput &_output, + GemmProfiler(TestbenchOutput &_output, std::string const &_kernel_name, - TestbenchOptions const &_options) + std::string const &_cutlass_algo, + TestbenchOptions const &_options, + Config const &_config) : output(_output), options(_options), + config(_config), kernel_name(_kernel_name), + cutlass_algo(_cutlass_algo), testbed(_options.initial_distribution) { - for (int i = 0; i < 2; ++i) { cudaError_t result = cudaEventCreate(&events[i]); if (result != cudaSuccess) { @@ -112,34 +123,47 @@ class GemmProfiler { ~GemmProfiler() {} /// Writes the workspace to text files - void write_problem(std::string const &kernel_name) { + void write_problem(Provider::Kind provider, std::string const &kernel_name) { + std::stringstream base_filename; - std::stringstream base_filename; + base_filename << provider << "_" << kernel_name << "_" << testbed.M() << "x" << testbed.N() + << "x" << testbed.K(); - base_filename - << kernel_name << "_" - << testbed.M() << "x" << testbed.N() << "x" << testbed.K(); + std::string results_name = base_filename.str() + "_results.txt"; + std::string errors_name = base_filename.str() + "_errors.txt"; - std::string results_name = base_filename.str() + "_results.txt"; - std::string errors_name = base_filename.str() + "_errors.txt"; - - std::ofstream results(results_name.c_str()); - std::ofstream errors(errors_name.c_str()); - testbed.write_problem(results, errors); + std::ofstream results(results_name.c_str()); + std::ofstream errors(errors_name.c_str()); + testbed.write_problem(results, errors); } /// Profiles Cutlass template - PerformanceResult execute_cutlass(GemmProblem const &problem, cublasGemmAlgo_t algorithm) { - PerformanceResult result(kernel_name, problem); + PerformanceResult execute_cutlass(GemmProblem const &problem, + cublasGemmAlgo_t algorithm) { + PerformanceResult result( + Provider::Cutlass + , kernel_name + , problem + ); - testbed.compute_reference(algorithm); - - if (cudaDeviceSynchronize() != cudaSuccess) { - result.disposition = Disposition::NotVerified; + if (options.dry_run) { + result.disposition = Disposition::NotRun; return result; } + if (CutlassDispatch::kRunCuBLAS) { + testbed.compute_reference(algorithm); + + if (cudaDeviceSynchronize() != cudaSuccess) { + result.disposition = Disposition::NotVerified; + return result; + } + } + else { + result.disposition = Disposition::Passed; + } + CutlassDispatch dispatch(testbed.M(), testbed.N(), testbed.K(), @@ -161,14 +185,16 @@ class GemmProfiler { return result; } - if (testbed.verify_with_reference()) { - result.disposition = Disposition::Passed; - } else { - result.disposition = Disposition::Incorrect; + if (CutlassDispatch::kRunCuBLAS) { + if (testbed.verify_with_reference()) { + result.disposition = Disposition::Passed; + } else { + result.disposition = Disposition::Incorrect; + } } if (options.save_workspace(result.disposition == Disposition::Passed)) { - write_problem(kernel_name); + write_problem(Provider::Cutlass, kernel_name); } if (cudaDeviceSynchronize() != cudaSuccess) { @@ -212,30 +238,38 @@ class GemmProfiler { result.gflops = testbed.GFLOPs_per_sec(result.runtime); if (result.disposition != Disposition::Passed) { - std::cout << kernel_name << " failed with disposition: " << result.disposition; + std::cout << "[\033[1;31mFAILED\033[0m]: " << kernel_name + << " failed with disposition: " << result.disposition << "\n"; } return result; } + template + bool contains(T const &container, F const &val) { + return std::find(container.begin(), container.end(), val) != container.end(); + } + /// Executes all kernels for this problem size template - std::vector execute(GemmProblem const &problem) { + std::vector > execute(GemmProblem const &problem) { // New problem size output.begin_problem(); - cublasGemmAlgo_t algorithm = - (CutlassDispatch::kThreadMultiplyAdd ? CUBLAS_GEMM_DEFAULT : CUBLAS_GEMM_DEFAULT_TENSOR_OP); + bool const tensor_op = !(CutlassDispatch::kThreadMultiplyAdd); + cublasGemmAlgo_t algorithm = tensor_op ? + CUBLAS_GEMM_DEFAULT_TENSOR_OP : CUBLAS_GEMM_DEFAULT; testbed.resize(problem); - std::vector results; - - results.push_back(execute_cutlass(problem, algorithm)); + std::vector > results; + results.push_back(execute_cutlass(problem, algorithm)); // cool-down period - pause(2); + if (!options.dry_run) { + pause(options.sleep_time); + } return results; } @@ -243,25 +277,20 @@ class GemmProfiler { /// Runs the test and collects performance for all results template void schmoo(Range const &M, Range const &N, Range const &K) { - for (int m = M.start; m <= M.end; m += M.increment) { - for (int n = N.start; n <= N.end; n += N.increment) { - for (int k = K.start; k <= K.end; k += K.increment) { + for (int m = M.start; m <= M.end; m = M.next(m)) { + for (int n = N.start; n <= N.end; n = N.next(n)) { + for (int k = K.start; k <= K.end; k = K.next(k)) { - // Avoid evaluating problem if problem size does not satisfy alignment - if (!CutlassDispatch::is_problem_aligned(m, n, k)) { - continue; - } - - std::vector results = + std::vector > results = execute(GemmProblem(m, n, k, CutlassDispatch::kLayoutA, CutlassDispatch::kLayoutB, - options.alpha, - options.beta)); + config.alpha, + config.beta)); - for (std::vector::const_iterator it = results.begin(); + for (std::vector >::const_iterator it = results.begin(); it != results.end(); ++it) { output.append(*it); @@ -274,46 +303,53 @@ class GemmProfiler { /// Runs the test over the problem space and reports only the best performance template void peak(Range const &M, Range const &N, Range const &K) { + typedef std::map > ProviderPerformanceMap; - PerformanceResult max_perf; - bool first_result = true; + ProviderPerformanceMap max_perf; - for (int m = M.start; m <= M.end; m += M.increment) { - for (int n = N.start; n <= N.end; n += N.increment) { - for (int k = K.start; k <= K.end; k += K.increment) { - - // Avoid evaluating problem if problem size does not satisfy alignment - if (!CutlassDispatch::is_problem_aligned(m, n, k)) { - continue; - } - - std::vector results = + for (int m = M.start; m <= M.end; m += M.next(m)) { + for (int n = N.start; n <= N.end; n += N.next(n)) { + for (int k = K.start; k <= K.end; k += K.next(k)) { + std::vector > results = execute(GemmProblem(m, n, k, CutlassDispatch::kLayoutA, CutlassDispatch::kLayoutB, - options.alpha, - options.beta)); + config.alpha, + config.beta)); - for (std::vector::const_iterator it = results.begin(); + for (std::vector >::const_iterator it = results.begin(); it != results.end(); ++it) { - /// Writes the output without appending it output.pretty_print(*it); - /// Updates maximum performing kernel - if (first_result || max_perf.gflops > it->gflops) { - max_perf = *it; + if (it->disposition == Disposition::Passed) { + /// Updates maximum performing kernel + ProviderPerformanceMap::iterator max_perf_it = max_perf.find(it->provider); + + if (max_perf_it == max_perf.end()) { + max_perf.insert(std::make_pair(it->provider, *it)); + } else if (max_perf_it->second.gflops < it->gflops) { + max_perf_it->second = *it; + } } - first_result = false; } } } } - output.append(max_perf); + Provider::Kind providers[] = { + Provider::Cutlass, + Provider::Invalid + }; + for (int i = 0; providers[i] != Provider::Invalid; ++i) { + ProviderPerformanceMap::const_iterator it = max_perf.find(providers[i]); + if (it != max_perf.end()) { + output.append(it->second); + } + } } }; @@ -321,17 +357,19 @@ class GemmProfiler { /// Dispatches to GEMM performance profiler template -int profile_gemm(TestbenchOutput &output, +int profile_gemm(TestbenchOutput &output, std::string const &kernel, - TestbenchOptions const &options) { - if (options.kernel_enabled(kernel)) { - GemmProfiler perf(output, kernel, options); + TestbenchOptions const &options, + Config const &config, + std::string const &cutlass_algo = "") { + if (config.kernel_enabled(kernel)) { + GemmProfiler perf(output, kernel, cutlass_algo, options, config); if (options.peak_performance) { perf.template peak( - options.problem_range.M, options.problem_range.N, options.problem_range.K); + config.problem_range.M, config.problem_range.N, config.problem_range.K); } else { perf.template schmoo( - options.problem_range.M, options.problem_range.N, options.problem_range.K); + config.problem_range.M, config.problem_range.N, config.problem_range.K); } } diff --git a/tools/test/perf/gemm/hgemm.cu b/tools/test/perf/gemm/hgemm.cu index e7e9af7fa..5b47e66dd 100644 --- a/tools/test/perf/gemm/hgemm.cu +++ b/tools/test/perf/gemm/hgemm.cu @@ -22,62 +22,62 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include - -#include -#include - -//////////////////////////////////////////////////////////////////////////////////////////////////// +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/hgemm_traits.h" +#include "tools/test/perf/cutlass_perf_test.h" +#include "tools/test/perf/gemm/gemm_perf_testbed.h" +#include "tools/test/perf/gemm/gemm_profiler.h" +#include "tools/test/perf/gemm/cutlass_dispatch.h" +#pragma warning( disable : 4503) namespace perf { //////////////////////////////////////////////////////////////////////////////////////////////////// -int profile_hgemm(TestbenchOutput &output, TestbenchOptions const &options) { - +int profile_hgemm(TestbenchOutput &output, TestbenchOptions const &options, Config const &config) { typedef perf::GemmProfiler< - cutlass::half_t, - cutlass::half_t, - cutlass::half_t, - cutlass::half_t, + cutlass::half_t, + cutlass::half_t, + cutlass::half_t, + cutlass::half_t, cutlass::half_t> GemmProfiler; int results = 0; - if (!results) { - - typedef cutlass::gemm::HgemmTraits< - cutlass::MatrixLayout::kColumnMajor, - cutlass::MatrixLayout::kRowMajor, - cutlass::Shape<8, 128, 128> - > - GemmTraits; - - typedef typename CutlassDispatchBasic::Dispatch Dispatch; - - profile_gemm(output, "hgemm_nt", options); + // compute capability check + if (!options.compute_capability(6, 0)) { + return 0; } - if (!results) { - + { typedef cutlass::gemm::HgemmTraits< cutlass::MatrixLayout::kColumnMajor, - cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 128, 128> > GemmTraits; typedef typename CutlassDispatchBasic::Dispatch Dispatch; - profile_gemm(output, "hgemm_nn", options); + results |= profile_gemm(output, "hgemm_nt", options, config); } - if (!results) { - + { + typedef cutlass::gemm::HgemmTraits< + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kColumnMajor, + cutlass::Shape<8, 128, 128> + > + GemmTraits; + + typedef typename CutlassDispatchBasic::Dispatch Dispatch; + + results |= profile_gemm(output, "hgemm_nn", options, config); + } + + { typedef cutlass::gemm::HgemmTraits< cutlass::MatrixLayout::kRowMajor, cutlass::MatrixLayout::kColumnMajor, @@ -87,11 +87,10 @@ int profile_hgemm(TestbenchOutput &output, TestbenchOptions const &options) { typedef typename CutlassDispatchBasic::Dispatch Dispatch; - profile_gemm(output, "hgemm_tn", options); + results |= profile_gemm(output, "hgemm_tn", options, config); } - if (!results) { - + { typedef cutlass::gemm::HgemmTraits< cutlass::MatrixLayout::kRowMajor, cutlass::MatrixLayout::kRowMajor, @@ -101,13 +100,18 @@ int profile_hgemm(TestbenchOutput &output, TestbenchOptions const &options) { typedef typename CutlassDispatchBasic::Dispatch Dispatch; - profile_gemm(output, "hgemm_tt", options); + results |= profile_gemm(output, "hgemm_tt", options, config); } return results; } +struct HgemmRegistrar { + HgemmRegistrar() { RegisterGemmProfileFunc(profile_hgemm); } +}; + +volatile HgemmRegistrar _HgemmRegistrar; + //////////////////////////////////////////////////////////////////////////////////////////////////// } // namespace perf - diff --git a/tools/test/perf/gemm/igemm.cu b/tools/test/perf/gemm/igemm.cu index 879ee0abf..24d721a91 100644 --- a/tools/test/perf/gemm/igemm.cu +++ b/tools/test/perf/gemm/igemm.cu @@ -23,24 +23,31 @@ * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/igemm_traits.h" +#include "tools/test/perf/cutlass_perf_test.h" +#include "tools/test/perf/gemm/gemm_perf_testbed.h" +#include "tools/test/perf/gemm/gemm_profiler.h" +#include "tools/test/perf/gemm/cutlass_dispatch.h" + +#pragma warning( disable : 4503) namespace perf { //////////////////////////////////////////////////////////////////////////////////////////////////// -int profile_igemm(TestbenchOutput &output, TestbenchOptions const &options) { +int profile_igemm(TestbenchOutput &output, TestbenchOptions const &options, Config const &config) { typedef perf::GemmProfiler GemmProfiler; + // compute capability check + if (!options.compute_capability(6, 1)) { + return 0; + } + int results = 0; - - if (!results) { - + + { typedef cutlass::gemm::IgemmTraits< cutlass::MatrixLayout::kColumnMajor, cutlass::MatrixLayout::kRowMajor @@ -48,11 +55,10 @@ int profile_igemm(TestbenchOutput &output, TestbenchOptions const &options) { typedef typename CutlassDispatchBasic::Dispatch Dispatch; - profile_gemm(output, "igemm_nt", options); + results |= profile_gemm(output, "igemm_nt", options, config); } - if (!results) { - + { typedef cutlass::gemm::IgemmTraits< cutlass::MatrixLayout::kColumnMajor, cutlass::MatrixLayout::kColumnMajor @@ -60,11 +66,10 @@ int profile_igemm(TestbenchOutput &output, TestbenchOptions const &options) { typedef typename CutlassDispatchBasic::Dispatch Dispatch; - profile_gemm(output, "igemm_nn", options); + results |= profile_gemm(output, "igemm_nn", options, config); } - if (!results) { - + { typedef cutlass::gemm::IgemmTraits< cutlass::MatrixLayout::kRowMajor, cutlass::MatrixLayout::kColumnMajor @@ -72,11 +77,10 @@ int profile_igemm(TestbenchOutput &output, TestbenchOptions const &options) { typedef typename CutlassDispatchBasic::Dispatch Dispatch; - profile_gemm(output, "igemm_tn", options); + results |= profile_gemm(output, "igemm_tn", options, config); } - if (!results) { - + { typedef cutlass::gemm::IgemmTraits< cutlass::MatrixLayout::kRowMajor, cutlass::MatrixLayout::kRowMajor @@ -84,12 +88,62 @@ int profile_igemm(TestbenchOutput &output, TestbenchOptions const &options) { typedef typename CutlassDispatchBasic::Dispatch Dispatch; - profile_gemm(output, "igemm_tt", options); + results |= profile_gemm(output, "igemm_tt", options, config); + } + + { + typedef cutlass::gemm::IgemmTraits, int, + cutlass::gemm::LinearScaling, cutlass::Shape<32, 8, 4> > GemmTraits; + + typedef typename CutlassDispatchBasic::Dispatch Dispatch; + + results |= profile_gemm(output, "igemm_32x32x128_nn", + options, config); + } + + { + typedef cutlass::gemm::IgemmTraits, int, + cutlass::gemm::LinearScaling, cutlass::Shape<32, 8, 4> > GemmTraits; + + typedef typename CutlassDispatchBasic::Dispatch Dispatch; + + results |= profile_gemm(output, "igemm_32x32x128_nt", + options, config); + } + + { + typedef cutlass::gemm::IgemmTraits, int, + cutlass::gemm::LinearScaling, cutlass::Shape<32, 8, 4> > GemmTraits; + + typedef typename CutlassDispatchBasic::Dispatch Dispatch; + + results |= profile_gemm(output, "igemm_32x32x128_tn", + options, config); + } + + { + typedef cutlass::gemm::IgemmTraits, int, + cutlass::gemm::LinearScaling, cutlass::Shape<32, 8, 4> > GemmTraits; + + typedef typename CutlassDispatchBasic::Dispatch Dispatch; + + results = profile_gemm(output, "igemm_32x32x128_tt", + options, config); } return results; } +struct IgemmRegistrar { + IgemmRegistrar() { RegisterGemmProfileFunc(profile_igemm); } +}; + +volatile IgemmRegistrar _IgemmRegistrar; + //////////////////////////////////////////////////////////////////////////////////////////////////// } // namespace perf diff --git a/tools/test/perf/gemm/sgemm.cu b/tools/test/perf/gemm/sgemm.cu index cdbbe3d57..1448ae0d4 100644 --- a/tools/test/perf/gemm/sgemm.cu +++ b/tools/test/perf/gemm/sgemm.cu @@ -22,80 +22,96 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include - -#include -#include +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/perf/cutlass_perf_test.h" +#include "tools/test/perf/gemm/gemm_perf_testbed.h" +#include "tools/test/perf/gemm/gemm_profiler.h" +#include "tools/test/perf/gemm/cutlass_dispatch.h" +#pragma warning( disable : 4503) namespace perf { //////////////////////////////////////////////////////////////////////////////////////////////////// -int profile_sgemm(TestbenchOutput &output, TestbenchOptions const &options) { +template +int profile_sgemm_kernel( + TestbenchOutput &output, + TestbenchOptions const &options, + Config const &config, + std::string const &name, + std::string const &algo) { typedef perf::GemmProfiler SGemmProfiler; int results = 0; - if (!results) { - + { typedef cutlass::gemm::SgemmTraits< cutlass::MatrixLayout::kColumnMajor, cutlass::MatrixLayout::kRowMajor, - cutlass::Shape<8, 128, 128> + OutputTile > GemmTraits; typedef typename CutlassDispatchBasic::Dispatch Dispatch; - profile_gemm(output, "sgemm_nt", options); + results |= profile_gemm(output, name + "_nt", options, config, algo); } - if (!results) { - + { typedef cutlass::gemm::SgemmTraits< cutlass::MatrixLayout::kColumnMajor, cutlass::MatrixLayout::kColumnMajor, - cutlass::Shape<8, 128, 128> + OutputTile > GemmTraits; typedef typename CutlassDispatchBasic::Dispatch Dispatch; - profile_gemm(output, "sgemm_nn", options); + results |= profile_gemm(output, name + "_nn", options, config, algo); } - if (!results) { - + { typedef cutlass::gemm::SgemmTraits< cutlass::MatrixLayout::kRowMajor, cutlass::MatrixLayout::kColumnMajor, - cutlass::Shape<8, 128, 128> + OutputTile > GemmTraits; typedef typename CutlassDispatchBasic::Dispatch Dispatch; - profile_gemm(output, "sgemm_tn", options); + results |= profile_gemm(output, name + "_tn", options, config, algo); } - if (!results) { - + { typedef cutlass::gemm::SgemmTraits< cutlass::MatrixLayout::kRowMajor, cutlass::MatrixLayout::kRowMajor, - cutlass::Shape<8, 128, 128> + OutputTile > GemmTraits; typedef typename CutlassDispatchBasic::Dispatch Dispatch; - profile_gemm(output, "sgemm_tt", options); + results |= profile_gemm(output, name + "_tt", options, config, algo); } + return results; +} + +/// Profiles all SGEMM tile sizes +int profile_sgemm(TestbenchOutput &output, TestbenchOptions const &options, Config const &config) { + int results = 0; + + results |= profile_sgemm_kernel >(output, options, config, "sgemm", "128x128"); return results; } +struct SgemmRegistrar { + SgemmRegistrar() { RegisterGemmProfileFunc(profile_sgemm); } +}; + +volatile SgemmRegistrar _SgemmRegistrar; + //////////////////////////////////////////////////////////////////////////////////////////////////// } // namespace perf - diff --git a/tools/test/perf/gemm/wmma_binary_gemm.cu b/tools/test/perf/gemm/wmma_binary_gemm.cu new file mode 100644 index 000000000..4ae236f4d --- /dev/null +++ b/tools/test/perf/gemm/wmma_binary_gemm.cu @@ -0,0 +1,149 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#include "tools/test/perf/cutlass_perf_test.h" +#include "tools/test/perf/gemm/gemm_profiler.h" +#include "tools/test/perf/gemm/gemm_perf_testbed.h" + +#include "cutlass/wmma_matrix.h" +#ifdef CUTLASS_USE_WMMA_API +#ifdef CUTLASS_USE_SUBBYTE_WMMA + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/wmma_gemm_traits.h" +#include "tools/test/perf/gemm/cutlass_dispatch.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaBinaryGemmDispatch { + + typedef cutlass::gemm::Gemm Gemm; + + typedef typename Gemm::Params Params; + + /// Indicate warp-level GEMM + static bool const kThreadMultiplyAdd = false; + + static bool const kRunCuBLAS = false; + + static cutlass::MatrixLayout::Kind const kLayoutA = Traits::kLayoutA; + static cutlass::MatrixLayout::Kind const kLayoutB = Traits::kLayoutB; + + // + // Data members + // + + /// Params argument + Params params; + + // + // Methods + // + + WmmaBinaryGemmDispatch() {} + + /// Initializes params object + WmmaBinaryGemmDispatch(int m, int n, int k, int alpha, + cutlass::Vector const* d_a, int lda, + cutlass::Vector const* d_b, int ldb, int beta, + int const* d_c, int ldc, int* d_d, int ldd) { + + params.initialize(m, n, k * 32, alpha, d_a, lda, d_b, ldb, beta, d_c, ldc, d_d, ldd); + } + + /// Initializes params object + WmmaBinaryGemmDispatch(Params const& _params) : params(_params) {} + + /// Launches kernel + cudaError_t operator()() { return Gemm::launch(params); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace perf { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +int profile_wmma_binary_gemm(TestbenchOutput &output, TestbenchOptions const &options, Config const &config) { + typedef perf::GemmProfiler, cutlass::Vector, int, int, int> GemmProfiler; + + int results = 0; + + // compute capability check + if (!options.compute_capability_exact(7, 5)) { + return 0; + } + + { + typedef cutlass::gemm::WmmaGemmTraits, + cutlass::Vector, + cutlass::Vector, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<1024, 32, 64>, + cutlass::Shape<128, 8, 8>, + 128, + 128> + WmmaGemmTraits; + + typedef WmmaBinaryGemmDispatch Dispatch; + + results |= profile_gemm(output, "wmma_binary_gemm_tn", options, config); + } + + return results; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace perf + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#else // ! CUTLASS_USE_SUBBYTE_WMMA + +namespace perf { + +int profile_wmma_binary_gemm(TestbenchOutput &output, TestbenchOptions const &options, Config const &config) { + return 0; +} + +} // namespace perf + +#endif + +struct WmmaBinaryGemmRegistrar { + WmmaBinaryGemmRegistrar() { perf::RegisterGemmProfileFunc(perf::profile_wmma_binary_gemm); } +}; + +volatile WmmaBinaryGemmRegistrar _WmmaBinaryGemmRegistrar; + +#endif // CUTLASS_USE_WMMA_API diff --git a/tools/test/perf/gemm/wmma_gemm.cu b/tools/test/perf/gemm/wmma_gemm.cu index 022191393..825712092 100644 --- a/tools/test/perf/gemm/wmma_gemm.cu +++ b/tools/test/perf/gemm/wmma_gemm.cu @@ -23,17 +23,19 @@ * **************************************************************************************************/ -#include +#include "cutlass/wmma_matrix.h" #ifdef CUTLASS_USE_WMMA_API +#pragma warning( disable : 4503) + //////////////////////////////////////////////////////////////////////////////////////////////////// -#include - -#include -#include -#include -#include +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/wmma_gemm_traits.h" +#include "tools/test/perf/cutlass_perf_test.h" +#include "tools/test/perf/gemm/gemm_profiler.h" +#include "tools/test/perf/gemm/cutlass_dispatch.h" +#include "tools/test/perf/gemm/gemm_perf_testbed.h" //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -47,9 +49,17 @@ struct WmmaGemmDispatch { /// Indicate warp-level GEMM static bool const kThreadMultiplyAdd = false; + static bool const kRunCuBLAS = true; + static cutlass::MatrixLayout::Kind const kLayoutA = Traits::kLayoutA; static cutlass::MatrixLayout::Kind const kLayoutB = Traits::kLayoutB; + typedef typename Traits::ScalarA ScalarA; + typedef typename Traits::ScalarB ScalarB; + typedef typename Traits::ScalarC ScalarC; + typedef typename Traits::ScalarD ScalarD; + typedef typename Traits::Epilogue::Functor::Scalar Scalar; + // // Data members // @@ -64,9 +74,20 @@ struct WmmaGemmDispatch { WmmaGemmDispatch() {} /// Initializes params object - WmmaGemmDispatch(int m, int n, int k, float alpha, half const* d_a, int lda, - half const* d_b, int ldb, float beta, float const* d_c, int ldc, - float* d_d, int ldd) { + WmmaGemmDispatch( + int m, + int n, + int k, + Scalar alpha, + ScalarA const* d_a, + int lda, + ScalarB const* d_b, + int ldb, + Scalar beta, + ScalarC const* d_c, + int ldc, + ScalarD* d_d, + int ldd) { params.initialize(m, n, k, alpha, d_a, lda, d_b, ldb, beta, d_c, ldc, d_d, ldd); } @@ -76,33 +97,6 @@ struct WmmaGemmDispatch { /// Launches kernel cudaError_t operator()() { return Gemm::launch(params); } - - /// Determines if problem is aligned (assuming no padding) - static bool is_problem_aligned( - int m, - int n, - int k) { - - bool aligned = true; - - if (kLayoutA == cutlass::MatrixLayout::kColumnMajor) { - aligned = aligned && !(m % Gemm::Traits::GemmConfig::kScalarsPerLdgA); - } - else { - aligned = aligned && !(k % Gemm::Traits::GemmConfig::kScalarsPerLdgA); - } - - if (kLayoutB == cutlass::MatrixLayout::kColumnMajor) { - aligned = aligned && !(k % Gemm::Traits::GemmConfig::kScalarsPerLdgB); - } - else { - aligned = aligned && !(n % Gemm::Traits::GemmConfig::kScalarsPerLdgB); - } - - aligned = aligned && !(m % Gemm::Traits::GemmConfig::kScalarsPerLdgC); - - return aligned; - } }; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -111,54 +105,49 @@ namespace perf { //////////////////////////////////////////////////////////////////////////////////////////////////// -int profile_wmma_gemm(TestbenchOutput &output, TestbenchOptions const &options) { - +int profile_wmma_gemm_f32(TestbenchOutput &output, TestbenchOptions const &options, Config const &config) { typedef perf::GemmProfiler GemmProfiler; int results = 0; - if (!results) { - + { typedef cutlass::gemm::WmmaGemmTraits WmmaGemmTraits; typedef WmmaGemmDispatch Dispatch; - profile_gemm(output, "wmma_gemm_nt", options); + results |= profile_gemm(output, "wmma_gemm_nt", options, config); } - if (!results) { - + { typedef cutlass::gemm::WmmaGemmTraits WmmaGemmTraits; typedef WmmaGemmDispatch Dispatch; - profile_gemm(output, "wmma_gemm_nn", options); + results |= profile_gemm(output, "wmma_gemm_nn", options, config); } - if (!results) { - + { typedef cutlass::gemm::WmmaGemmTraits WmmaGemmTraits; typedef WmmaGemmDispatch Dispatch; - profile_gemm(output, "wmma_gemm_tn", options); + results |= profile_gemm(output, "wmma_gemm_tn", options, config); } - if (!results) { - + { typedef cutlass::gemm::WmmaGemmTraits WmmaGemmTraits; typedef WmmaGemmDispatch Dispatch; - profile_gemm(output, "wmma_gemm_tt", options); + results |= profile_gemm(output, "wmma_gemm_tt", options, config); } return results; @@ -166,6 +155,112 @@ int profile_wmma_gemm(TestbenchOutput &output, TestbenchOptions const &options) //////////////////////////////////////////////////////////////////////////////////////////////////// +int profile_wmma_gemm_f16( + TestbenchOutput &output, + TestbenchOptions const &options, + Config const &config) { + + typedef perf::GemmProfiler< + cutlass::half_t, + cutlass::half_t, + cutlass::half_t, + cutlass::half_t, + cutlass::half_t> GemmProfiler; + + int results = 0; + + { + typedef cutlass::gemm::WmmaGemmTraits< + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor, + cutlass::Shape<32, 128, 128>, + half, + half, + half, + cutlass::gemm::LinearScaling, + half, + cutlass::Shape<32, 64, 64> + > + WmmaGemmTraits; + + typedef WmmaGemmDispatch Dispatch; + + results |= profile_gemm(output, "wmma_gemm_f16_nt", options, config); + } + + { + typedef cutlass::gemm::WmmaGemmTraits< + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kColumnMajor, + cutlass::Shape<32, 128, 128>, + half, + half, + half, + cutlass::gemm::LinearScaling, + half, + cutlass::Shape<32, 64, 64> + > + WmmaGemmTraits; + + typedef WmmaGemmDispatch Dispatch; + + results |= profile_gemm(output, "wmma_gemm_f16_nn", options, config); + } + + { + typedef cutlass::gemm::WmmaGemmTraits< + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kColumnMajor, + cutlass::Shape<32, 128, 128>, + half, + half, + half, + cutlass::gemm::LinearScaling, + half, + cutlass::Shape<32, 64, 64> + > + WmmaGemmTraits; + + typedef WmmaGemmDispatch Dispatch; + + results |= profile_gemm(output, "wmma_gemm_f16_tn", options, config); + } + + { + typedef cutlass::gemm::WmmaGemmTraits< + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kRowMajor, + cutlass::Shape<32, 128, 128>, + half, + half, + half, + cutlass::gemm::LinearScaling, + half, + cutlass::Shape<32, 64, 64> + > + WmmaGemmTraits; + + typedef WmmaGemmDispatch Dispatch; + + results |= profile_gemm(output, "wmma_gemm_f16_tt", options, config); + } + + return results; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +struct WmmaGemmRegistrar { + WmmaGemmRegistrar() { + RegisterGemmProfileFunc(profile_wmma_gemm_f32); + RegisterGemmProfileFunc(profile_wmma_gemm_f16); + } +}; + +volatile WmmaGemmRegistrar _WmmaGemmRegistrar; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + } // namespace perf //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/perf/gemm/wmma_integer_gemm.cu b/tools/test/perf/gemm/wmma_integer_gemm.cu new file mode 100644 index 000000000..db615989a --- /dev/null +++ b/tools/test/perf/gemm/wmma_integer_gemm.cu @@ -0,0 +1,455 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#include "tools/test/perf/cutlass_perf_test.h" +#include "tools/test/perf/gemm/gemm_perf_testbed.h" +#include "tools/test/perf/gemm/gemm_profiler.h" + +#include "cutlass/wmma_matrix.h" +#ifdef CUTLASS_USE_WMMA_API +#ifdef CUTLASS_USE_SUBBYTE_WMMA + +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/wmma_gemm_traits.h" +#include "tools/test/perf/gemm/cutlass_dispatch.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaIntegerGemmDispatch { + + typedef cutlass::gemm::Gemm Gemm; + + typedef typename Gemm::Params Params; + + /// Indicate warp-level GEMM + static bool const kThreadMultiplyAdd = false; + + static bool const kRunCuBLAS = false; + + static cutlass::MatrixLayout::Kind const kLayoutA = Traits::kLayoutA; + static cutlass::MatrixLayout::Kind const kLayoutB = Traits::kLayoutB; + + // + // Data members + // + + /// Params argument + Params params; + + // + // Methods + // + + WmmaIntegerGemmDispatch() {} + + /// Initializes params object + WmmaIntegerGemmDispatch(int m, int n, int k, int alpha, + ScalarA const* d_a, int lda, + ScalarB const* d_b, int ldb, int beta, + int const* d_c, int ldc, int* d_d, int ldd) { + + params.initialize(m, n, k, alpha, d_a, lda, d_b, ldb, beta, d_c, ldc, d_d, ldd); + } + + /// Initializes params object + WmmaIntegerGemmDispatch(Params const& _params) : params(_params) {} + + /// Launches kernel + cudaError_t operator()() { return Gemm::launch(params); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaIntegerGemmDispatch, + cutlass::Vector > { + + typedef typename cutlass::Vector ScalarA; + typedef typename cutlass::Vector ScalarB; + + typedef cutlass::gemm::Gemm Gemm; + + typedef typename Gemm::Params Params; + + /// Indicate warp-level GEMM + static bool const kThreadMultiplyAdd = false; + + static bool const kRunCuBLAS = false; + + static cutlass::MatrixLayout::Kind const kLayoutA = Traits::kLayoutA; + static cutlass::MatrixLayout::Kind const kLayoutB = Traits::kLayoutB; + + // + // Data members + // + + /// Params argument + Params params; + + // + // Methods + // + + WmmaIntegerGemmDispatch() {} + + /// Initializes params object + WmmaIntegerGemmDispatch(int m, int n, int k, int alpha, + ScalarA const* d_a, int lda, + ScalarB const* d_b, int ldb, int beta, + int const* d_c, int ldc, int* d_d, int ldd) { + + params.initialize(m, n, k * 8, alpha, d_a, lda, d_b, ldb, beta, d_c, ldc, d_d, ldd); + } + + /// Initializes params object + WmmaIntegerGemmDispatch(Params const& _params) : params(_params) {} + + /// Launches kernel + cudaError_t operator()() { return Gemm::launch(params); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaIntegerGemmDispatch, + cutlass::Vector > { + + typedef typename cutlass::Vector ScalarA; + typedef typename cutlass::Vector ScalarB; + + typedef cutlass::gemm::Gemm Gemm; + + typedef typename Gemm::Params Params; + + /// Indicate warp-level GEMM + static bool const kThreadMultiplyAdd = false; + + static bool const kRunCuBLAS = false; + + static cutlass::MatrixLayout::Kind const kLayoutA = Traits::kLayoutA; + static cutlass::MatrixLayout::Kind const kLayoutB = Traits::kLayoutB; + + // + // Data members + // + + /// Params argument + Params params; + + // + // Methods + // + + WmmaIntegerGemmDispatch() {} + + /// Initializes params object + WmmaIntegerGemmDispatch(int m, int n, int k, int alpha, + ScalarA const* d_a, int lda, + ScalarB const* d_b, int ldb, int beta, + int const* d_c, int ldc, int* d_d, int ldd) { + + params.initialize(m, n, k * 8, alpha, d_a, lda, d_b, ldb, beta, d_c, ldc, d_d, ldd); + } + + /// Initializes params object + WmmaIntegerGemmDispatch(Params const& _params) : params(_params) {} + + /// Launches kernel + cudaError_t operator()() { return Gemm::launch(params); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace perf { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +int profile_wmma_integer_gemm(TestbenchOutput &output, TestbenchOptions const &options, Config const &config) { + + int results = 0; + + // compute capability check + if (!options.compute_capability(7, 5)) { + return 0; + } + + { + typedef cutlass::gemm::WmmaGemmTraits, + signed char, + signed char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<128, 32, 32>, + cutlass::Shape<16, 16, 16>, + 16, + 16> WmmaGemmTraits; + + typedef WmmaIntegerGemmDispatch Dispatch; + + typedef perf::GemmProfiler GemmProfiler; + + results |= profile_gemm(output, "wmma_integer_gemm_s8_16x16x16_nn", options, config); + } + + { + typedef cutlass::gemm::WmmaGemmTraits, + signed char, + signed char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<128, 32, 32>, + cutlass::Shape<16, 16, 16>, + 16, + 16> WmmaGemmTraits; + + typedef WmmaIntegerGemmDispatch Dispatch; + + typedef perf::GemmProfiler GemmProfiler; + + results |= profile_gemm(output, "wmma_integer_gemm_s8_16x16x16_nt", options, config); + } + + { + typedef cutlass::gemm::WmmaGemmTraits, + signed char, + signed char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<128, 32, 32>, + cutlass::Shape<16, 16, 16>, + 16, + 16> WmmaGemmTraits; + + typedef WmmaIntegerGemmDispatch Dispatch; + + typedef perf::GemmProfiler GemmProfiler; + + results |= profile_gemm(output, "wmma_integer_gemm_s8_16x16x16_tn", options, config); + } + + { + typedef cutlass::gemm::WmmaGemmTraits, + signed char, + signed char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<128, 32, 32>, + cutlass::Shape<16, 16, 16>, + 16, + 16> WmmaGemmTraits; + + typedef WmmaIntegerGemmDispatch Dispatch; + + typedef perf::GemmProfiler GemmProfiler; + + results |= profile_gemm(output, "wmma_integer_gemm_s8_16x16x16_tt", options, config); + } + + { + typedef cutlass::gemm::WmmaGemmTraits, + unsigned char, + unsigned char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<128, 32, 32>, + cutlass::Shape<16, 16, 16>, + 16, + 16> WmmaGemmTraits; + + typedef WmmaIntegerGemmDispatch Dispatch; + + typedef perf::GemmProfiler GemmProfiler; + + results |= profile_gemm(output, "wmma_integer_gemm_u8_16x16x16_nn", options, config); + } + + { + typedef cutlass::gemm::WmmaGemmTraits, + unsigned char, + unsigned char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<128, 32, 32>, + cutlass::Shape<16, 16, 16>, + 16, + 16> WmmaGemmTraits; + + typedef WmmaIntegerGemmDispatch Dispatch; + + typedef perf::GemmProfiler GemmProfiler; + + results |= profile_gemm(output, "wmma_integer_gemm_u8_16x16x16_nt", options, config); + } + + { + typedef cutlass::gemm::WmmaGemmTraits, + unsigned char, + unsigned char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<128, 32, 32>, + cutlass::Shape<16, 16, 16>, + 16, + 16> WmmaGemmTraits; + + typedef WmmaIntegerGemmDispatch Dispatch; + + typedef perf::GemmProfiler GemmProfiler; + + results |= profile_gemm(output, "wmma_integer_gemm_u8_16x16x16_tn", options, config); + } + + { + typedef cutlass::gemm::WmmaGemmTraits, + unsigned char, + unsigned char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<128, 32, 32>, + cutlass::Shape<16, 16, 16>, + 16, + 16> WmmaGemmTraits; + + typedef WmmaIntegerGemmDispatch Dispatch; + + typedef perf::GemmProfiler GemmProfiler; + + results |= profile_gemm(output, "wmma_integer_gemm_u8_16x16x16_tt", options, config); + } + + // compute capability check + if (!options.compute_capability_exact(7, 5)) { + return 0; + } + + { + typedef cutlass::gemm::WmmaGemmTraits, + cutlass::Vector, + cutlass::Vector, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<256, 32, 32>, + cutlass::Shape<32, 8, 8>, + 32, + 32> WmmaGemmTraits; + + typedef WmmaIntegerGemmDispatch, + cutlass::Vector > Dispatch; + + typedef perf::GemmProfiler, + cutlass::Vector, + int, + int, + int> GemmProfiler; + + results |= profile_gemm(output, "wmma_integer_gemm_s4_tn", options, config); + } + + { + typedef cutlass::gemm::WmmaGemmTraits, + cutlass::Vector, + cutlass::Vector, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<256, 32, 32>, + cutlass::Shape<32, 8, 8>, + 32, + 32> WmmaGemmTraits; + + typedef WmmaIntegerGemmDispatch, + cutlass::Vector > Dispatch; + + typedef perf::GemmProfiler, + cutlass::Vector, + int, + int, + int> GemmProfiler; + + results |= profile_gemm(output, "wmma_integer_gemm_u4_tn", options, config); + } + + return results; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace perf + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#else // ! CUTLASS_USE_SUBBYTE_WMMA + +namespace perf { + +int profile_wmma_integer_gemm(TestbenchOutput &output, TestbenchOptions const &options, Config const &config) { + return 0; +} + +} + +#endif + +struct WmmaIntegerGemmRegistrar { + WmmaIntegerGemmRegistrar() { perf::RegisterGemmProfileFunc(perf::profile_wmma_integer_gemm); } +}; + +volatile WmmaIntegerGemmRegistrar _WmmaIntegerGemmRegistrar; + +#endif // ifdef CUTLASS_USE_WMMA_API diff --git a/tools/test/perf/performance_result.h b/tools/test/perf/performance_result.h index 65ec4b18e..38674b76a 100644 --- a/tools/test/perf/performance_result.h +++ b/tools/test/perf/performance_result.h @@ -25,25 +25,39 @@ #pragma once -#include -#include +#include "cutlass/matrix_traits.h" +#include "tools/util/command_line.h" +#include "tools/test/perf/provider.h" //////////////////////////////////////////////////////////////////////////////////////////////////// namespace perf { +//////////////////////////////////////////////////////////////////////////////////////////////////// + /// Outcome of test struct Disposition { - enum Kind { Unknown = 0, NotRun, Passed, Incorrect, Failed, NotVerified, Invalid }; + enum Kind { + Unknown = 0, + NotRun, + Passed, + Incorrect, + Failed, + NotVerified, + Invalid + }; }; //////////////////////////////////////////////////////////////////////////////////////////////////// -} // namespace perf - -inline std::ostream &operator<<(std::ostream &out, perf::Disposition::Kind value) { - char const *str[] = { - "unknown", "not_run", "passed", "incorrect", "failed", "not_verified", "invalid"}; +inline std::ostream &operator<<(std::ostream &out, Disposition::Kind value) { + char const *str[] = {"unknown", + "not_run", + "passed", + "incorrect", + "failed", + "not_verified", + "invalid"}; if (value >= perf::Disposition::Unknown && value < perf::Disposition::Invalid) { out << str[value]; } else { @@ -62,10 +76,6 @@ inline std::ostream &operator<<(std::ostream &out, cutlass::MatrixLayout::Kind l //////////////////////////////////////////////////////////////////////////////////////////////////// -namespace perf { - -//////////////////////////////////////////////////////////////////////////////////////////////////// - /// Size and layout of a GEMM problem struct GemmProblem { // @@ -86,7 +96,7 @@ struct GemmProblem { // /// Static method to print GemmProblem headers - static std::string header() { return "M, N, K, Layout_A, Layout_B, Beta"; } + static std::string header() { return "M,N,K,Layout_A,Layout_B,Beta"; } // // Methods @@ -129,34 +139,27 @@ struct GemmProblem { } }; -//////////////////////////////////////////////////////////////////////////////////////////////////// - -} // namespace perf - -//////////////////////////////////////////////////////////////////////////////////////////////////// - /// Prints a problem to an output stream -inline std::ostream &operator<<(std::ostream &out, perf::GemmProblem const &problem) { - out << problem.m << ", " << problem.n << ", " << problem.k << ", " << problem.layout_A << ", " - << problem.layout_B << ", " << problem.beta; +inline std::ostream &operator<<(std::ostream &out, GemmProblem const &problem) { + out << problem.m << "," << problem.n << "," << problem.k << "," << problem.layout_A << "," + << problem.layout_B << "," << problem.beta; return out; } //////////////////////////////////////////////////////////////////////////////////////////////////// -namespace perf { - -//////////////////////////////////////////////////////////////////////////////////////////////////// - /// Result object +template struct PerformanceResult { + /// Provider of GEMM implementation + Provider::Kind provider; /// Name of kernel std::string kernel_name; /// Problem size - GemmProblem problem; + Problem problem; /// Outcome of test Disposition::Kind disposition; @@ -166,40 +169,45 @@ struct PerformanceResult { /// Throughput in units of GFLOPs double gflops; + // // Methods // - PerformanceResult( - std::string const &_kernel_name = "", - GemmProblem const &_problem = GemmProblem(), - Disposition::Kind _disposition = Disposition::NotRun, - double _runtime = 0, - double _gflops = 0) - : - kernel_name(_kernel_name), - problem(_problem), - disposition(_disposition), - runtime(_runtime), - gflops(_gflops) {} + PerformanceResult(Provider::Kind _provider = Provider::Cutlass + , std::string const &_kernel_name = "" + , Problem const &_problem = Problem() + , Disposition::Kind _disposition = Disposition::NotRun + , double _runtime = 0 + , double _gflops = 0 + ): + provider(_provider) + , kernel_name(_kernel_name) + , problem(_problem) + , disposition(_disposition) + , runtime(_runtime) + , gflops(_gflops) + {} /// Displays headers static std::string header() { - return std::string("Kernel, ") + GemmProblem::header() + - ", Disposition, Runtime, GFLOPs"; + std::stringstream ss; + + ss << "Provider,Kernel," << Problem::header(); + ss << ",Disposition,Runtime,GFLOPs"; + return ss.str(); } /// Prints human-readable results std::ostream &pretty_print(std::ostream &out) const { - out << "Kernel: \033[1m" << kernel_name << "\033[0m\n" + << " provider: " << provider << "\n" << " problem: "; std::stringstream disposition_str; if (disposition == Disposition::Passed) { disposition_str << "\033[1m"; - } - else { + } else { disposition_str << "\033[1;31m"; } disposition_str << disposition << "\033[0m"; @@ -215,15 +223,16 @@ struct PerformanceResult { //////////////////////////////////////////////////////////////////////////////////////////////////// -} // namespace perf - /// Outputs result -inline std::ostream &operator<<(std::ostream &out, perf::PerformanceResult const &result) { +template +inline std::ostream &operator<<(std::ostream &out, PerformanceResult const &result) { - out << result.kernel_name << ", " << result.problem << ", " - << result.disposition << ", " << result.runtime << ", " << result.gflops; + out << result.provider << "," << result.kernel_name << "," << result.problem << "," + << result.disposition << "," << result.runtime << "," << result.gflops; return out; } //////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace perf diff --git a/tools/test/perf/provider.h b/tools/test/perf/provider.h new file mode 100644 index 000000000..544ee3fbb --- /dev/null +++ b/tools/test/perf/provider.h @@ -0,0 +1,71 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#pragma once + +#include + +namespace perf { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Implementation under test +struct Provider { + enum Kind { + Unknown = 0, + Cutlass, + Invalid + }; + + static Provider::Kind from_string(std::string const &str) { + if (str == "cutlass" || str == "Cutlass") { + return Cutlass; + } + else { + return Invalid; + } + } +}; + +/// Prints provider +inline std::ostream &operator<<(std::ostream &out, Provider::Kind provider) { + char const *str[] = { + "unknown", + "Cutlass", + "invalid" + }; + if (provider >= perf::Provider::Unknown && provider < perf::Provider::Invalid) { + out << str[provider]; + } else { + out << str[perf::Provider::Invalid]; + } + return out; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace perf + + diff --git a/tools/test/perf/testbench_configs.h b/tools/test/perf/testbench_configs.h new file mode 100644 index 000000000..a7036aba8 --- /dev/null +++ b/tools/test/perf/testbench_configs.h @@ -0,0 +1,189 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#pragma once + +#include +#include +#include +#include + +#include "tools/test/perf/testbench_options.h" + +namespace perf { + +// Structure of configurations to run +struct Config { + // Scalar value for GEMM + double alpha; + + /// Scalar value for GEMM + double beta; + + // kernel to run + std::vector kernels; + + /// Range of problem sizes + GemmProblemRange problem_range; + + // Reference GFLOPs + double gflops_ref; + + // Reference Runtime + double runtime_ref; + + // Reference Peak Throughput + double peak_throughput_ref; + + // Returns true if the kernel name appears among the enabled kernels + bool kernel_enabled(std::string const &kernel) const { + typedef std::vector::const_iterator kernel_iterator; + + for (kernel_iterator it = kernels.begin(); it != kernels.end(); ++it) { + if (kernel.find(*it) != std::string::npos) { + return true; + } + } + + return false; + } +}; + +// Class to set the configurations to run +struct TestbenchConfigs { + // + // Data members + // + + // Vector of configurations to run + std::vector configs; + + // Options to test environment + TestbenchOptions options; + + // Input CSV file to read (if applicable) + std::ifstream threshold_file; + + // + // Methods + // + + // Determines the configurations to run from the threshold file + void configs_from_file() { + // Set the values of kernels, M, N, K and beta based off of values read from CSVs + threshold_file.open(options.threshold_filename.c_str()); + if (threshold_file.is_open()) { + std::string line; + int provider_idx = -1; + int kernel_idx = -1; + int beta_idx = -1; + int m_idx = -1; + int n_idx = -1; + int k_idx = -1; + int gflops_idx = -1; + int runtime_idx = -1; + int peak_throughput_idx = -1; + + // Read the header and get the indices of the columns + if (getline(threshold_file, line)) { + char delim = ','; + size_t s_idx = 0; + size_t d_idx = std::string::npos; + int idx = 0; + line.erase(std::remove(line.begin(), line.end(), ' '), line.end()); + while (s_idx < line.size()) { + d_idx = line.find_first_of(delim, s_idx); + size_t end_idx = (d_idx != std::string::npos ? d_idx : line.size()); + std::string item = line.substr(s_idx, end_idx - s_idx); + if (item.compare("Provider") == 0) provider_idx = idx; + if (item.compare("Kernel") == 0) kernel_idx = idx; + if (item.compare("Beta") == 0) beta_idx = idx; + if (item.compare("M") == 0) m_idx = idx; + if (item.compare("N") == 0) n_idx = idx; + if (item.compare("K") == 0) k_idx = idx; + if (item.compare("GFLOPs") == 0) gflops_idx = idx; + if (item.compare("Runtime") == 0) runtime_idx = idx; + if (item.compare("SOL") == 0) peak_throughput_idx = idx; + s_idx = end_idx + 1; // For comma + idx++; + } + } + + while (getline(threshold_file, line)) { + char delim = ','; + size_t s_idx = 0; + size_t d_idx = std::string::npos; + std::vector tokens; + line.erase(std::remove(line.begin(), line.end(), ' '), line.end()); + while (s_idx < line.size()) { + d_idx = line.find_first_of(delim, s_idx); + size_t end_idx = (d_idx != std::string::npos ? d_idx : line.size()); + std::string item = line.substr(s_idx, end_idx - s_idx); + tokens.push_back(item); + s_idx = end_idx + 1; // For comma + } + if (tokens[provider_idx].compare("Cutlass") == 0) { + // Create a new config + Config config = Config(); + config.alpha = options.alpha; + config.beta = strtod(tokens[beta_idx].c_str(), NULL); + config.kernels.push_back(tokens[kernel_idx]); + config.problem_range.M = Range((int)strtol(tokens[m_idx].c_str(), NULL, 10)); + config.problem_range.N = Range((int)strtol(tokens[n_idx].c_str(), NULL, 10)); + config.problem_range.K = Range((int)strtol(tokens[k_idx].c_str(), NULL, 10)); + config.gflops_ref = strtod(tokens[gflops_idx].c_str(), NULL); + config.runtime_ref = strtod(tokens[runtime_idx].c_str(), NULL); + config.peak_throughput_ref = strtod(tokens[peak_throughput_idx].c_str(), NULL); + configs.push_back(config); + } + } + } else { // !threshold_file.is_open() + std::cout << "ERROR: Could not open threshold file " << options.threshold_filename << "\n"; + } + } + + // Determines the configurations to run from the command line arguments + void configs_from_args() { + Config config = Config(); + config.alpha = options.alpha; + config.beta = options.beta; + for (int i = 0; i < options.kernels.size(); i++) { + config.kernels.push_back(options.kernels[i]); + } + config.problem_range = options.problem_range; + configs.push_back(config); + } + + // Constructor + TestbenchConfigs(TestbenchOptions const &_options) : options(_options) { + if (!options.threshold_filename.empty()) { + configs_from_file(); + } else { + configs_from_args(); + } + } +}; + +} // namespace perf diff --git a/tools/test/perf/testbench_options.h b/tools/test/perf/testbench_options.h index dd2676294..25dff1526 100644 --- a/tools/test/perf/testbench_options.h +++ b/tools/test/perf/testbench_options.h @@ -25,8 +25,16 @@ #pragma once +#include +#include + #include -#include +#include + +#include "cutlass/cutlass.h" +#include "tools/util/command_line.h" +#include "tools/util/distribution.h" +#include "tools/test/perf/provider.h" namespace perf { @@ -34,14 +42,73 @@ namespace perf { /// Range of problem sizes struct Range { + + enum Operator { + Add, + Multiply + }; + + // + // Data members + // + int start; int end; int increment; + Operator increment_op; - Range(int _start = 0) : start(_start), end(_start), increment(1) {} + // + // Methods + // - Range(int _start, int _end, int _increment = 1) - : start(_start), end(_end), increment(_increment) {} + Range(int _start = 0) : start(_start), end(_start), increment(1), increment_op(Add) {} + + Range(int _start, int _end, int _increment = 1, Operator _op = Add) + : start(_start), end(_end), increment(_increment), increment_op(_op) {} + + /// Returns the next item in series + int next(int val) const { + switch (increment_op) { + case Add: val += increment; break; + case Multiply: val *= increment; break; + default: val = end; break; + } + return val; + } + + void import_from_strings(const std::vector& values) { + if (values.size() > 0) { + std::stringstream ss; + ss << values.at(0); + ss >> start; + } + + if (values.size() > 1) { + std::stringstream ss; + ss << values.at(1); + ss >> end; + } else { + end = start; + } + + if (values.size() > 2 && !values.at(2).empty()) { + std::stringstream ss; + + char first = values.at(2).at(0); + if (first == '*' || first == '+') { + ss << values.at(2).substr(1); + switch (first) { + case '*': increment_op = Multiply; break; + case '+': increment_op = Add; break; + default: break; + } + } + else { + ss << values.at(2); + } + ss >> increment; + } + } }; /////////////////////////////////////////////////////////////////////////////////////////////////// @@ -77,25 +144,7 @@ struct GemmProblemRange { std::vector values; args.get_cmd_line_arguments(arg.c_str(), values, ':'); - if (values.size() > 0) { - std::stringstream ss; - ss << values.at(0); - ss >> range.start; - } - - if (values.size() > 1) { - std::stringstream ss; - ss << values.at(1); - ss >> range.end; - } else { - range.end = range.start; - } - - if (values.size() > 2) { - std::stringstream ss; - ss << values.at(2); - ss >> range.increment; - } + range.import_from_strings(values); } else { range = _default; } @@ -111,105 +160,6 @@ struct GemmProblemRange { //////////////////////////////////////////////////////////////////////////////////////////////////// -/// Distribution type -struct Distribution { - /// Variant types - enum Kind { Invalid, Uniform, Gaussian, Linear, Identity }; - - /// Distribution state - union { - /// Uniform distribution - struct { - double min; - double max; - } uniform; - - /// Gaussian distribution - struct { - double mean; - double stddev; - } gaussian; - - /// Elements are linear combination of row and column index - struct { - double offset; - double delta_row; - double delta_column; - } linear; - }; - - /// Active variant kind - Kind kind; - - /// Random values are cast to integer after scaling by this power of two - int int_scale; - - // - // Methods - // - - Distribution() : kind(Invalid), int_scale(0) {} - - /// Configures distribution as uniform random - Distribution &set_uniform(double _min, double _max, int _int_scale = 0) { - kind = Uniform; - uniform.min = _min; - uniform.max = _max; - int_scale = _int_scale; - return *this; - } - - /// Configures distribution as Gaussian distribution - Distribution &set_gaussian(double _mean, double _stddev, int _int_scale = 0) { - kind = Gaussian; - gaussian.mean = _mean; - gaussian.stddev = _stddev; - int_scale = _int_scale; - return *this; - } - - - /// Sets identity - Distribution &set_identity() { - kind = Identity; - return *this; - } -}; - -} // namespace perf - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/// Prints a Distribution to ostream -inline std::ostream &operator<<(std::ostream &out, perf::Distribution const &dist) { - switch (dist.kind) { - case perf::Distribution::Uniform: - out << "uniorm, min: " << dist.uniform.min << ", max: " << dist.uniform.max; - break; - case perf::Distribution::Gaussian: - out << "gaussian, mean: " << dist.gaussian.mean << ", stddev: " << dist.gaussian.stddev; - break; - case perf::Distribution::Linear: - out << "linear, mean: " << dist.linear.offset << ", delta_row: " << dist.linear.delta_row - << ", delta_column: " << dist.linear.delta_column; - break; - case perf::Distribution::Identity: - break; - default: - out << "unknown"; - } - - out << ", int_scale: " << dist.int_scale; - - return out; -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -namespace perf { - -//////////////////////////////////////////////////////////////////////////////////////////////////// - /// Defines a vector of string pairs typedef std::vector > KeyValueVector; @@ -219,13 +169,13 @@ typedef KeyValueVector::const_iterator KeyValueIterator; /// Structure captures the initial configuration of matrices struct InitialDistribution { /// Distribution of A matrix operand - Distribution dist_A; + cutlass::Distribution dist_A; /// Distribution of B matrix operand - Distribution dist_B; + cutlass::Distribution dist_B; - /// Distribution of C matrix operand - Distribution dist_C; + /// cutlass::Distribution of C matrix operand + cutlass::Distribution dist_C; /// Seed for random number generation int64_t seed; @@ -237,15 +187,15 @@ struct InitialDistribution { /// Gets the initial distribution static void get_distribution(cutlass::CommandLine const &args, std::string const &arg, - Distribution &dist) { + cutlass::Distribution &dist) { struct { const char *label; - Distribution::Kind kind; - } distribution_kinds[] = {{"uniform", Distribution::Uniform}, - {"gaussian", Distribution::Gaussian}, - {"linear", Distribution::Linear}, - {"identity", Distribution::Identity}, - {0, Distribution::Invalid}}; + cutlass::Distribution::Kind kind; + } distribution_kinds[] = {{"uniform", cutlass::Distribution::Uniform}, + {"gaussian", cutlass::Distribution::Gaussian}, + {"linear", cutlass::Distribution::Linear}, + {"identity", cutlass::Distribution::Identity}, + {0, cutlass::Distribution::Invalid}}; struct { char const *label; @@ -276,13 +226,17 @@ struct InitialDistribution { // Subsequent key-value pairs update the named field of the distribution struct. for (; it != values.end(); ++it) { - // Integer scaling factor - if < 0, no integer rounding is performed. if (it->first == "scale" && !it->second.empty()) { std::stringstream ss; ss << it->second; ss >> dist.int_scale; + continue; // next token + } + // Casts as integer without scaling + if (it->first == "integer") { + dist.int_scale = 0; continue; // next token } @@ -326,12 +280,12 @@ struct InitialDistribution { args.get_cmd_line_argument("seed", seed, seed); // Update all distributions at once - Distribution dist_all; + cutlass::Distribution dist_all; if (args.check_cmd_line_flag("dist")) { - get_distribution(args, "dist", dist_all); - dist_A = dist_all; - dist_B = dist_all; - dist_C = dist_all; + get_distribution(args, "dist", dist_all); + dist_A = dist_all; + dist_B = dist_all; + dist_C = dist_all; } get_distribution(args, "dist_A", dist_A); @@ -344,19 +298,18 @@ struct InitialDistribution { /// Defines how to execute the benchmarks struct ExecutionMode { - enum Kind { - Profile, - Verify, - Single, - Invalid - }; + enum Kind { Profile, Verify, Single, Invalid }; static std::string to_string(Kind kind) { switch (kind) { - case Profile: return "profile"; - case Verify: return "verify"; - case Single: return "single"; - default: return "invalid"; + case Profile: + return "profile"; + case Verify: + return "verify"; + case Single: + return "single"; + default: + return "invalid"; } } @@ -370,18 +323,18 @@ struct ExecutionMode { /// Indicates when the workspace is saved struct WorkspaceSaveMode { - enum Kind { - Never, - Incorrect, - Always - }; + enum Kind { Never, Incorrect, Always }; static std::string to_string(Kind kind) { switch (kind) { - case Never: return "never"; - case Incorrect: return "incorrect"; - case Always: return "always"; - default: return "incorrect"; + case Never: + return "never"; + case Incorrect: + return "incorrect"; + case Always: + return "always"; + default: + return "incorrect"; } } @@ -397,7 +350,6 @@ struct WorkspaceSaveMode { /// Class holding testbench command line options struct TestbenchOptions { - // // Data members // @@ -408,18 +360,24 @@ struct TestbenchOptions { // Path to output file name std::string output_filename; + // Path to input file name + std::string threshold_filename; + /// If true, output is appended bool append; /// Number of iterations int iterations; - + /// Defines how to run the benchmark ExecutionMode::Kind execution_mode; /// Indicates when the workspace is saved WorkspaceSaveMode::Kind save_workspace_mode; + /// Properties of CUDA device + cudaDeviceProp device_properties; + /// Enabled kernel names std::vector kernels; @@ -432,12 +390,21 @@ struct TestbenchOptions { /// Range of problem sizes GemmProblemRange problem_range; + /// If true, kernels are not executed, and no sleep waits are inserted + bool dry_run; + /// Tags to describe the profiler output KeyValueVector pivot_tags; /// If enabled, only the peak performance for a given kernel is reported bool peak_performance; + /// Performance Degradatiom Margin before flagging as test failure + double perf_margin; + + /// Cool-down period + int sleep_time; + // // Methods // @@ -447,26 +414,47 @@ struct TestbenchOptions { : initial_distribution(args), execution_mode(ExecutionMode::Profile), save_workspace_mode(WorkspaceSaveMode::Never), - problem_range(args) { + problem_range(args), + dry_run(false), + sleep_time(1) { + + // Set the CUDA device and/or specify clock rate + configure_cuda_device(args); // fetch command line arguments args.get_cmd_line_argument("iterations", iterations, 25); args.get_cmd_line_argument("append", append, false); args.get_cmd_line_argument("output", output_filename); + args.get_cmd_line_argument("threshold", threshold_filename); args.get_cmd_line_argument("alpha", alpha, 1.0); args.get_cmd_line_argument("beta", beta, 0.0); args.get_cmd_line_argument("peak", peak_performance, false); args.get_cmd_line_argument_pairs("tags", pivot_tags); + args.get_cmd_line_argument("perf-margin", perf_margin, 0.97); + args.get_cmd_line_argument("dry-run", dry_run, false); + args.get_cmd_line_argument("sleep-time", sleep_time, 1); - if (args.check_cmd_line_flag("execution_mode")) { + if (args.check_cmd_line_flag("execution-mode")) { std::string str; - args.get_cmd_line_argument("execution_mode", str); + args.get_cmd_line_argument("execution-mode", str); execution_mode = ExecutionMode::from_string(str); } - if (args.check_cmd_line_flag("save_workspace")) { + if (args.check_cmd_line_flag("save-workspace")) { std::string str; - args.get_cmd_line_argument("save_workspace", str); + args.get_cmd_line_argument("save-workspace", str); + save_workspace_mode = WorkspaceSaveMode::from_string(str); + } + + if (args.check_cmd_line_flag("execution-mode")) { + std::string str; + args.get_cmd_line_argument("execution-mode", str); + execution_mode = ExecutionMode::from_string(str); + } + + if (args.check_cmd_line_flag("save-workspace")) { + std::string str; + args.get_cmd_line_argument("save-workspace", str); save_workspace_mode = WorkspaceSaveMode::from_string(str); } @@ -474,13 +462,50 @@ struct TestbenchOptions { if (args.check_cmd_line_flag("kernels")) { args.get_cmd_line_arguments("kernels", kernels, ','); } else { - char const *gemms[] = {"sgemm", "dgemm", "hgemm", "igemm", "wmma_gemm", 0}; + char const *gemms[] = { + "sgemm", + "dgemm", + "hgemm", + "igemm", + "wmma_gemm", + "wmma_gemm_f16", + "wmma_binary_gemm", + "wmma_integer_gemm", + 0 + }; char const *layouts[] = {"nn", "nt", "tn", "tt", 0}; for (int i = 0; gemms[i]; ++i) { for (int j = 0; layouts[j]; ++j) { + if ((std::string(gemms[i]).compare("wmma_binary_gemm") == 0 || + std::string(gemms[i]).compare("wmma_integer_gemm") == 0) + && std::string(layouts[j]).compare("tn") != 0) { + continue; + } kernels.push_back(std::string(gemms[i]) + "_" + layouts[j]); } } + + } + } + + void configure_cuda_device(cutlass::CommandLine const &args) { + int device_id = 0; + args.get_cmd_line_argument("device", device_id, 0); + + cudaError_t result; + result = cudaGetDeviceProperties(&device_properties, device_id); + if (result != cudaSuccess) { + throw std::runtime_error("cudaGetDeviceProperties() failed for given device."); + } + result = cudaSetDevice(device_id); + if (result != cudaSuccess) { + throw std::runtime_error("cudaSetDevice() failed for given device."); + } + + // Get the clock rate (specified in cmd line in MHz) + if (args.check_cmd_line_flag("clock")) { + args.get_cmd_line_argument("clock", device_properties.clockRate); + device_properties.clockRate *= 1000; } } @@ -501,15 +526,31 @@ struct TestbenchOptions { /// be saved to the file system. bool save_workspace(bool correct) const { if (save_workspace_mode == WorkspaceSaveMode::Always || - (save_workspace_mode == WorkspaceSaveMode::Incorrect && !correct)) { + (save_workspace_mode == WorkspaceSaveMode::Incorrect && !correct)) { return true; } return false; } + /// Returns true if the selected device can satisfy the given compute capability + bool compute_capability(int major, int minor) const { + return (device_properties.major > major || + (device_properties.major == major && device_properties.minor >= minor)); + } + + /// Requires an exact match of compute capability + bool compute_capability_exact(int major, int minor) const { + return major == device_properties.major && minor == device_properties.minor; + } + + /// Prints version + static void version(std::ostream &out) { + out << "CUTLASS " << CUTLASS_MAJOR << "." << CUTLASS_MINOR << "." << CUTLASS_PATCH + << " built on " << __DATE__ << " at " << __TIME__; + } + /// Prints the usage statement static void usage(std::ostream &out) { - out << "cutlass_perf_test [options]\n\n" << " --help\n" @@ -523,15 +564,27 @@ struct TestbenchOptions { << " --beta= " << " Value for beta to be used in GEMM experiments\n" - << " --dist_{A,B,C}= " + << " --device= " + << " Specifies the CUDA device to use. Default is device 0.\n" + + << " --clock= " + << " Specifies the SM clock rate in MHz.\n" + + << " --dist-{A,B,C}= " << " Describes the random distribution of each of the input matrix operands.\n" - << " --execution_mode= " + << " --dry-run= " + << " If true, kernels are not executed and sleep is not inserted.\n" + + << " --execution-mode= " << " Specifies execution mode: profile, verify, single\n" << " --output= " << " Writes summary of profiling to specified .csv file\n" + << " --threshold= " + << " Reads previous output summary and re-executes the same configurations.\n" + << " --iterations= " << " maximum number of iterations to execute when profiling\n" @@ -546,14 +599,19 @@ struct TestbenchOptions { << " --k=[:max depth[:step]] " << " Size of inner dimension of A and B. May specify a range with optional step size.\n" - << " --kernels={s|d|h|i|wmma_}gemm_{nn,nt,tn,tt} " + << " --kernels=<{s|d|h|i|wmma_|wmma_binary_|wmma_integer_}gemm_{nn,nt,tn,tt}>\n" + << " " << " Select GEMM datatype and layout to use for tests\n" << " --peak= " << " If true, only reports peak performance per kernel after profiling specified " "problem space.\n" - << " --save_workspace={*never,incorrect,always} " + << " --perf-margin= " + << " Allowable performance degradation before flagging test as failure (e.g. 3% slowdown" + " = 0.97).\n" + + << " --save-workspace={*never,incorrect,always} " << " Specifies when to save the GEMM inputs and results to the filesystem.\n" << " --seed= " @@ -563,8 +621,17 @@ struct TestbenchOptions { << " Inserts leading columns in output table and uniform values for each column. Useful " "for generating pivot tables.\n" - << "\n\n" + << " --sleep-time= " + << " Sleep period between profiling kernels to cool down the device.\n" + << " --version " + << " "; + + version(out); + + out << "\n\n"; + + out << "\n\n" << "Example usage:\n\n" << "# Runs one problem size for all kernels\n" diff --git a/tools/test/perf/testbench_output.h b/tools/test/perf/testbench_output.h index 5c0cb28b3..297f02f89 100644 --- a/tools/test/perf/testbench_output.h +++ b/tools/test/perf/testbench_output.h @@ -27,15 +27,16 @@ #include -#include -#include -#include +#include "tools/test/perf/performance_result.h" +#include "tools/test/perf/testbench_options.h" +#include "tools/util/command_line.h" namespace perf { //////////////////////////////////////////////////////////////////////////////////////////////////// /// Wraps an output stream and constructs a comma-separated value table of results +template class TestbenchOutput { public: /// Options to test environment @@ -51,7 +52,7 @@ class TestbenchOutput { bool buffer_csv_output; /// Vector holding performance results - std::vector buffered_perf_results; + std::vector > buffered_perf_results; private: /// Opens the output file and updates output_ptr @@ -74,11 +75,11 @@ class TestbenchOutput { // pivot tags for (KeyValueIterator tag_it = options.pivot_tags.begin(); tag_it != options.pivot_tags.end(); ++tag_it) { - ss << tag_it->first << ", "; + ss << tag_it->first << ","; } // performance result header - ss << PerformanceResult::header(); + ss << PerformanceResult::header(); return ss.str(); } @@ -95,14 +96,23 @@ class TestbenchOutput { /// Writes output to CSV ~TestbenchOutput() { - std::cout << std::endl; - if (buffer_csv_output) { - out() << "\n\n" << header() << std::endl; - for (std::vector::const_iterator it = buffered_perf_results.begin(); - it != buffered_perf_results.end(); - ++it) { - write_csv(*it); + if (buffered_perf_results.size() != 0) { + std::cout << std::endl; + if (buffer_csv_output) { + out() << "\n\n" << header() << std::endl; + for (typename std::vector >::const_iterator it = + buffered_perf_results.begin(); + it != buffered_perf_results.end(); + ++it) { + write_csv(*it); + } } + std::cout << "\n[\033[1;32mPASSED\033[0m]"; + if (!options.threshold_filename.empty()) { + std::cout << " - Performance Test Successful" << std::endl; + } else { + std::cout << std::endl; + } } } @@ -122,11 +132,11 @@ class TestbenchOutput { } /// Writes a performance result to CSV output - TestbenchOutput &write_csv(PerformanceResult const &result) { + TestbenchOutput &write_csv(PerformanceResult const &result) { // pivot tags for (KeyValueIterator tag_it = options.pivot_tags.begin(); tag_it != options.pivot_tags.end(); ++tag_it) { - out() << tag_it->second << ", "; + out() << tag_it->second << ","; } out() << result << std::endl; @@ -134,24 +144,26 @@ class TestbenchOutput { } /// Prints the output without appending it for CSV writing - TestbenchOutput &pretty_print(PerformanceResult const &result) { + TestbenchOutput &pretty_print(PerformanceResult const &result) { result.pretty_print(std::cout) << std::endl; return *this; } /// Emits the result as output - TestbenchOutput &append(PerformanceResult const &result) { + TestbenchOutput &append(PerformanceResult const &result) { if (buffer_csv_output) { buffered_perf_results.push_back(result); } else { write_csv(result); + buffered_perf_results.push_back(result); } pretty_print(result); return *this; } + }; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/CMakeLists.txt b/tools/test/unit/CMakeLists.txt index 93d0290ec..36f92ecf5 100644 --- a/tools/test/unit/CMakeLists.txt +++ b/tools/test/unit/CMakeLists.txt @@ -37,14 +37,23 @@ link_libraries( set(CUTLASS_UNIT_TEST_HEADERS cutlass_unit_test.h core/layout_verification.h + gemm/run_gemm.h + gemm/gemm_testbed.h ) set(CUTLASS_UNIT_TEST_SOURCES cutlass_unit_test.cpp + core/tensor_ref.cu + core/tensor_view.cu + util/unique_ptr.cu util/host_tensor.cu + util/complex.cu + util/tensor_foreach.cu + util/tensor_elementwise.cu core/layout_verification.cu core/predicate_vector.cu core/tile_iterator.cu + core/zip_tile_iterator.cu gemm/dgemm.cu gemm/hgemm_128x128x8.cu gemm/hgemm_128x128x16.cu @@ -68,7 +77,19 @@ set(CUTLASS_UNIT_TEST_SOURCES gemm/sgemm_64x64x16.cu gemm/sgemm_64x32x8.cu gemm/sgemm_64x32x16.cu + gemm/fp16_sgemm_fp32_128x128x16.cu + gemm/fp16_sgemm_fp16_128x128x16.cu gemm/wmma_gemm.cu + gemm/wmma_binary_gemm.cu + gemm/wmma_integer_gemm.cu + gemm/sgemm_threadblock_swizzle_nn.cu + gemm/sgemm_threadblock_swizzle_nt.cu + gemm/sgemm_threadblock_swizzle_tn.cu + gemm/sgemm_threadblock_swizzle_tt.cu + gemm/batched_strided_sgemm_128x128x8.cu + gemm/batched_strided_dgemm_128x128x8.cu + gemm/batched_strided_hgemm_128x128x8.cu + gemm/epilogue_functor.cu ) if (CUTLASS_NVRTC_ENABLE) @@ -101,4 +122,6 @@ if (CUTLASS_NVRTC_ENABLE) endif() endif() -CUDA_ADD_CUBLAS_TO_TARGET(cutlass_unit_test) +target_link_libraries(cutlass_unit_test ${CUBLAS_LIBRARY}) + + diff --git a/tools/test/unit/core/layout_verification.cu b/tools/test/unit/core/layout_verification.cu index 523967c3d..c043ced09 100644 --- a/tools/test/unit/core/layout_verification.cu +++ b/tools/test/unit/core/layout_verification.cu @@ -22,9 +22,9 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include +#include "cutlass_unit_test.h" #include -#include +#include "tools/test/unit/core/layout_verification.h" namespace test { diff --git a/tools/test/unit/core/layout_verification.h b/tools/test/unit/core/layout_verification.h index 42306edb0..a0716131d 100644 --- a/tools/test/unit/core/layout_verification.h +++ b/tools/test/unit/core/layout_verification.h @@ -29,12 +29,12 @@ #include #include -#include +#include "cutlass/tensor_view.h" -#include -#include -#include -#include +#include "tools/util/half.h" +#include "tools/util/host_tensor_view.h" +#include "tools/util/tensor_view_io.h" +#include "tools/util/type_traits.h" namespace test { @@ -275,6 +275,8 @@ class VerifyLayout { for (int index = 0; index < count; ++index) { SourceBits element = hash(layout(index)); + // std::cout << " " << index << ": 0x" << std::hex << element << std::dec << std::endl; + data[index] = element; } } diff --git a/tools/test/unit/core/predicate_vector.cu b/tools/test/unit/core/predicate_vector.cu index d873cc3cf..6e0b00ced 100644 --- a/tools/test/unit/core/predicate_vector.cu +++ b/tools/test/unit/core/predicate_vector.cu @@ -26,9 +26,9 @@ #include #include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/predicate_vector.h" +#include "tools/util/host_tensor.h" namespace test { @@ -118,3 +118,126 @@ TEST(PredicateVector, Basic) { } } } + +TEST(PredicateVector, Count) { + + { + typedef cutlass::PredicateVector<4, 8> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 1) + << "PredicateVector<4, 8> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<4, 4> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 1) + << "PredicateVector<4, 4> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<4, 2> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 1) + << "PredicateVector<4, 2> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<4, 1> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 1) + << "PredicateVector<4, 1> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<8, 8> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 1) + << "PredicateVector<8, 8> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<8, 4> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 1) + << "PredicateVector<8, 4> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<8, 2> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 1) + << "PredicateVector<8, 2> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<8, 1> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 2) + << "PredicateVector<8, 1> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<16, 8> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 1) + << "PredicateVector<16, 8> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<16, 4> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 1) + << "PredicateVector<16, 4> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<16, 2> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 2) + << "PredicateVector<16, 2> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<16, 1> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 4) + << "PredicateVector<16, 1> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<32, 8> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 1) + << "PredicateVector<32, 8> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<32, 4> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 2) + << "PredicateVector<32, 4> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<32, 2> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 4) + << "PredicateVector<32, 2> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<32, 1> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 8) + << "PredicateVector<32, 1> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<64, 8> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 2) + << "PredicateVector<64, 8> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<64, 4> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 4) + << "PredicateVector<64, 4> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<64, 2> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 8) + << "PredicateVector<64, 2> word count: " << PredicateVector::kWordCount; + } + + { + typedef cutlass::PredicateVector<64, 1> PredicateVector; + EXPECT_EQ(int(PredicateVector::kWordCount), 16) + << "PredicateVector<64, 1> word count: " << PredicateVector::kWordCount; + } +} diff --git a/tools/test/unit/core/tensor_ref.cu b/tools/test/unit/core/tensor_ref.cu new file mode 100644 index 000000000..ee16f92f1 --- /dev/null +++ b/tools/test/unit/core/tensor_ref.cu @@ -0,0 +1,220 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#include "cutlass_unit_test.h" + +#include "cutlass/tensor_ref.h" +#include "cutlass/matrix_traits.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TensorRef, basic_rank2) { + int const M = 8; + int const N = 16; + + int matrix_data[M * N] = {0}; + + cutlass::TensorRef matrix_ref(matrix_data, cutlass::make_Coord(N, 1)); + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + matrix_ref.at(cutlass::make_Coord(m, n)) = m * N + n; + } + } + + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + EXPECT_EQ(matrix_data[m * N + n], int(m * N + n)); + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TensorRef, rank2_column_major) { + int const M = 8; + int const N = 8; + + int matrix_data[M * N]; + + cutlass::TensorRef ref(matrix_data, M); + + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + ref.at(cutlass::make_Coord(m, n)) = m * N + n; + } + } + + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + EXPECT_EQ(matrix_data[m + n * M], int(m * N + n)); + } + } +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TensorRef, rank2_row_major) { + int const M = 8; + int const N = 16; + + int matrix_data[M * N] = { 0 }; + + cutlass::TensorRef ref(matrix_data, N); + + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + ref.at(cutlass::make_Coord(m, n)) = m * N + n; + } + } + + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + EXPECT_EQ(matrix_data[m * N + n], int(m * N + n)); + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TensorRef, rank2_contiguous_dynamic) { + int const M = 8; + int const N = 16; + + typedef cutlass::TensorRef ContiguousTensorRef; + + cutlass::MatrixLayout::Kind layouts[] = { + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor + }; + + for (int i = 0; i < 2; ++i) { + + int matrix_data[M * N] = { 0 }; + + int ldm; + int row_stride; + int col_stride; + + if (layouts[i] == cutlass::MatrixLayout::kColumnMajor) { + row_stride = 1; + col_stride = M; + ldm = col_stride; + } + else { + row_stride = N; + col_stride = 1; + ldm = row_stride; + } + + // Use helper to determine stride vector from leading dimension + ContiguousTensorRef ref( + matrix_data, + cutlass::MatrixLayout::ContiguousLayout::stride(layouts[i], ldm)); + + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + ref.at(cutlass::make_Coord(m, n)) = m * N + n; + } + } + + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + EXPECT_EQ(matrix_data[m * row_stride + n * col_stride], int(m * N + n)); + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TensorRef, rank2_column_major_interleaved) { + int const M = 16; + int const N = 16; + int const kInterleave = 4; + + int matrix_data[M * N] = {0}; + + // Define the TensorRefMapFunc for a column-major interleaved matrix format + typedef cutlass::MatrixLayout::ColumnMajorInterleaved TensorRefMapFunc; + + // Construct a TensorRef + cutlass::TensorRef< + int, + 2, + TensorRefMapFunc> ref(matrix_data, TensorRefMapFunc::stride(M)); + + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + ref.at(cutlass::make_Coord(m, n)) = m + n * M; + } + } + + // Verify + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; n += kInterleave) { + for (int i = 0; i < kInterleave; ++i) { + EXPECT_EQ(matrix_data[m * kInterleave + n * M + i], int(m + (n + i) * M)); + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TensorRef, rank2_row_major_interleaved) { + int const M = 16; + int const N = 16; + int const kInterleave = 4; + + int matrix_data[M * N] = {0}; + + // Define the TensorRefMapFunc for a row-major interleaved matrix format + typedef cutlass::MatrixLayout::RowMajorInterleaved TensorRefMapFunc; + + // Construct a TensorRef + cutlass::TensorRef< + int, + 2, + TensorRefMapFunc> ref(matrix_data, TensorRefMapFunc::stride(N)); + + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + ref.at(cutlass::make_Coord(m, n)) = m + n * M; + } + } + + // Verify + for (int m = 0; m < M; m += kInterleave) { + for (int n = 0; n < N; ++n) { + for (int i = 0; i < kInterleave; ++i) { + EXPECT_EQ(matrix_data[m * N + i + n * kInterleave], int((m + i) + n * M)); + } + } + } +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////// + diff --git a/tools/test/unit/core/tensor_view.cu b/tools/test/unit/core/tensor_view.cu new file mode 100644 index 000000000..8090f468d --- /dev/null +++ b/tools/test/unit/core/tensor_view.cu @@ -0,0 +1,235 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#include "cutlass_unit_test.h" + +#include "cutlass/tensor_view.h" +#include "cutlass/matrix_traits.h" + +#include "tools/util/tensor_view_io.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TensorView, rank2_contiguous_dynamic) { + int const M = 8; + int const N = 16; + + typedef cutlass::TensorView ContiguousTensorView; + + cutlass::MatrixLayout::Kind layouts[] = { + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor + }; + + cutlass::Coord<2> bounds = cutlass::make_Coord(M - 2, N - 2); + + for (int i = 0; i < 2; ++i) { + + int matrix_data[M * N] = { 0 }; + + int ldm; + int row_stride; + int col_stride; + + if (layouts[i] == cutlass::MatrixLayout::kColumnMajor) { + row_stride = 1; + col_stride = M; + ldm = col_stride; + } + else { + row_stride = N; + col_stride = 1; + ldm = row_stride; + } + + // Use helper to determine stride vector from leading dimension + ContiguousTensorView view( + matrix_data, + cutlass::MatrixLayout::ContiguousLayout::stride(layouts[i], ldm), + bounds); + + ASSERT_TRUE(view.good()); + + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + cutlass::Coord<2> coord = cutlass::make_Coord(m, n); + if (view.contains(coord)) { + view.at(coord) = m * N + n; + } + } + } + + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + int expected = 0; + if (m < bounds[0] && n < bounds[1]) { + expected = int(m * N + n); + } + EXPECT_EQ(matrix_data[m * row_stride + n * col_stride], expected); + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Uncomment the following line to observe output from printing TensorView objects +// + +// #define OBSERVE_TENSORVIEW_IO // uncomment to enable printing + +#ifdef OBSERVE_TENSORVIEW_IO + +// This test construct a TensorView of rank=2 with matrix layouts known at runtime. This +// uses TensorRefMapFunc classes defined in cutlass/matrix_traits.h to define the mapping +// from logical tensor indices to storage in memory. +// +// Helpers in tools/util/tensor_view_io.h print both the logical TensorView and the +// linear memory of the tensor. +TEST(TensorView, contiguous) { + + int const M = 8; + int const N = 16; + + typedef cutlass::TensorView< + int32_t, + 2, + cutlass::MatrixLayout::ContiguousLayout> ContiguousTensorView; + + cutlass::MatrixLayout::Kind layouts[] = { + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor + }; + + cutlass::Coord<2> bounds = cutlass::make_Coord(M, N); + + for (int i = 0; i < 2; ++i) { + + int matrix_data[M * N] = { 0 }; + + int ldm; + int row_stride; + int col_stride; + + if (layouts[i] == cutlass::MatrixLayout::kColumnMajor) { + row_stride = 1; + col_stride = M; + ldm = col_stride; + } + else { + row_stride = N; + col_stride = 1; + ldm = row_stride; + } + + // Use helper to determine stride vector from leading dimension + ContiguousTensorView view( + matrix_data, + cutlass::MatrixLayout::ContiguousLayout::stride(layouts[i], ldm), + bounds); + + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + cutlass::Coord<2> coord = cutlass::make_Coord(m, n); + if (view.contains(coord)) { + view.at(coord) = m * N + n; + } + } + } + + std::cout << "---------\n"; + std::cout << (layouts[i] == cutlass::MatrixLayout::kColumnMajor ? + "Column-major:" : "Row-major:") << "\n\n"; + + std::cout << "Logical view:\n"; + std::cout.width(4); + std::cout << view << "\n" << std::endl; // Print TensorView object. + + std::cout << "Linear memory:"; + for (int idx = 0; idx < view.capacity(); ++idx) { + if (!(idx % (layouts[i] == cutlass::MatrixLayout::kColumnMajor ? M : N))) { + std::cout << std::endl; + } + std::cout << std::setw(4) << view.at(idx) << " "; + } + + std::cout << "\n" << std::endl; + } +} + +// This test is similar to the previous except it uses a column-major, interleaved data +// layout. The test prints both the logical representation (a typical column-major matrix) +// and a representation of linear memory. +// +// Note, the interleave=4 structure implies that every four consecutive elements in the +// same row shall be adjacent in memory followed by the next row. +TEST(TensorView, rank2_column_major_interleaved) { + int const M = 16; + int const N = 16; + int const kInterleave = 4; + + int matrix_data[M * N] = {0}; + + cutlass::Coord<2> bounds = cutlass::make_Coord(M, N); + + // Define the TensorRefMapFunc for a column-major interleaved matrix format + typedef cutlass::MatrixLayout::ColumnMajorInterleaved TensorRefMapFunc; + + // Define a TensorView of rank=2 using the column-major interleaved mapping function + typedef cutlass::TensorView< + int, + 2, + TensorRefMapFunc> InterleavedTensorView; + + InterleavedTensorView view( + matrix_data, + TensorRefMapFunc::stride(M), + bounds); + + // Initialize + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + view.at(cutlass::make_Coord(m, n)) = m + n * M; + } + } + + // Print logical view + std::cout << "Column-major, interleave=" << kInterleave << " (logical view):\n"; + + std::cout << std::setw(4) << view << "\n" << std::endl; + + // Now define a linear view of the same data in memory + typedef cutlass::TensorView LinearTensorView; + + LinearTensorView linear_view(matrix_data, cutlass::make_Coord(N), bounds); + + std::cout << "Linear view in memory:\n"; + std::cout << std::setw(4) << linear_view << std::endl; +} + +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + + diff --git a/tools/test/unit/core/tile_iterator.cu b/tools/test/unit/core/tile_iterator.cu index 144e4393f..c7f959812 100644 --- a/tools/test/unit/core/tile_iterator.cu +++ b/tools/test/unit/core/tile_iterator.cu @@ -3,14 +3,14 @@ * * Redistribution and use in source and binary forms, with or without modification, are permitted * provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright notice, this list of - * conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used - * to endorse or promote products derived from this software without specific prior written - * permission. + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND @@ -22,29 +22,37 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "tools/util/host_matrix.h" +#include "tools/util/tensor_view_io.h" +#include "cutlass/shape.h" +#include "cutlass/predicate_vector.h" +#include "cutlass/tile_iterator.h" +#include "cutlass/tile_traits_standard.h" +#include "cutlass/iterator_access.h" //////////////////////////////////////////////////////////////////////////////////////////////////// namespace test { - template - __global__ void load_store_global( - typename cutlass::TileLoadIterator::Scalar const *input, - typename cutlass::TileStoreIterator::Scalar *output - ) { +template +__global__ void load_store_global( + typename cutlass::TileLoadIterator::Scalar const *input, + typename cutlass::TileStoreIterator::Scalar *output, + int kW, + int kH, + typename cutlass::TileStoreIterator::Scalar identity = 0 + ) { + /// Load iterator typedef cutlass::TileLoadIterator LoadIterator; + /// Store iterator typedef cutlass::TileStoreIterator StoreIterator; + /// Predicate vector + typedef typename LoadIterator::PredicateVector PredicateVector; typename LoadIterator::Params load_params; typename StoreIterator::Params store_params; @@ -56,98 +64,144 @@ namespace test { LoadIterator load_iterator(load_params); StoreIterator store_iterator(store_params); + PredicateVector predicates; + + load_iterator.initialize_predicates(predicates.begin(), cutlass::make_Coord(1, kH, kW)); typename LoadIterator::Fragment fragment; - load_iterator.load(fragment); - store_iterator.store(fragment); + load_iterator.load_post_increment(fragment, predicates.begin()); + store_iterator.store_post_increment(fragment); +} + +/// Launches the load_store_global test +template +void run_load_store_global(int kW, int kH) { + + typedef cutlass::TileTraitsStandard Traits; + + typedef typename cutlass::TypeTraits::device_type ScalarDevice; + + cutlass::HostMatrix input; + cutlass::HostMatrix output; + + input.resize(cutlass::make_Coord(Tile::kW, Tile::kH), cutlass::MatrixLayout::kColumnMajor); + output.resize(cutlass::make_Coord(Tile::kW, Tile::kH), cutlass::MatrixLayout::kColumnMajor); + + input.fill_linear(cutlass::make_Coord(1, Tile::kW)); + output.fill(0); + + test::load_store_global <<< + dim3(1, 1, 1), + dim3(kThreadsPerThreadBlock, 1) + >>>(input.device_data(), output.device_data(), kW, kH); + + cudaError_t result = cudaDeviceSynchronize(); + + ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) + << "\n"; + output.sync_host(); + + bool passed = true; + for(int i = 0; i < Tile::kW; ++i) { + for(int j = 0; j < Tile::kH; ++j) { + if(i < kW && j < kH && output.at(cutlass::make_Coord(i, j)) != Scalar(Tile::kW*j+i)){ + std::cout << "FAILED: (" << i << ", " << j + << ") -- expected: " << (Tile::kW*j+i) + << ", actual: " << output.at(cutlass::make_Coord(i, j)) + << std::endl; + passed = false; + break; + } + } } + EXPECT_TRUE(passed); +} + //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(TileIterator, tile_128x8_contiguous) { - - static int const M = 128; - static int const N = 1; - static int const K = 8; - - static int const kThreads = M; - - typedef cutlass::Shape ThreadBlockTile; - - typedef cutlass::TileTraitsStandard, kThreads> Traits; - - cutlass::HostTensor input; - cutlass::HostTensor output; - - input.resize_matrix(ThreadBlockTile::kW, ThreadBlockTile::kD, - cutlass::MatrixLayout::kColumnMajor); - - output.resize_matrix(ThreadBlockTile::kW, ThreadBlockTile::kD, - cutlass::MatrixLayout::kColumnMajor); - - input.fill_linear(cutlass::make_Coord(1, 1, ThreadBlockTile::kW, 1)); - output.fill(0); - - test::load_store_global< Traits, float ><<< - dim3(1,1,1), - dim3(kThreads, 1) - >>>( - input.device_data(), - output.device_data() - ); - - cudaError_t result = cudaDeviceSynchronize(); - ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) - << "\n"; - output.sync_host(); - - EXPECT_TRUE(input.bit_equals(output)); - + run_load_store_global, 128>(128, 8); } + //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(TileIterator, tile_128x8_rake) { - - static int const M = 128; - static int const N = 1; - static int const K = 8; - - static int const kThreads = 32; - - typedef cutlass::Shape ThreadBlockTile; - - typedef cutlass::TileTraitsStandard, kThreads> Traits; - - cutlass::HostTensor input; - cutlass::HostTensor output; - - input.resize_matrix(ThreadBlockTile::kW, ThreadBlockTile::kD, - cutlass::MatrixLayout::kColumnMajor); - - output.resize_matrix(ThreadBlockTile::kW, ThreadBlockTile::kD, - cutlass::MatrixLayout::kColumnMajor); - - input.fill_linear(cutlass::make_Coord(1, 1, ThreadBlockTile::kW, 1)); - output.fill(0); - - test::load_store_global< Traits, float ><<< - dim3(1,1,1), - dim3(kThreads, 1) - >>>( - input.device_data(), - output.device_data() - ); - - cudaError_t result = cudaDeviceSynchronize(); - ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) - << "\n"; - - output.sync_host(); - - EXPECT_TRUE(input.bit_equals(output)); - + run_load_store_global, 32>(128, 8); } + //////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TileIterator, tile_127x8_contiguous) { + run_load_store_global, 128>(127, 8); } +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TileIterator, tile_129x8_contiguous) { + run_load_store_global, 128>(129, 8); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TileIterator, tile_112x8_contiguous) { + run_load_store_global, 128>(112, 8); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TileIterator, tile_67x8_contiguous) { + run_load_store_global, 128>(67, 8); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TileIterator, tile_113x7_contiguous) { + run_load_store_global, 128>(113, 7); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TileIterator, tile_113x10_contiguous) { + run_load_store_global, 128>(113, 10); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TileIterator, tile_131x7_contiguous) { + run_load_store_global, 128>(131, 7); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TileIterator, tile_131x9_contiguous) { + run_load_store_global, 128>(131, 9); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Half +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TileIterator, tile_128x8_contiguous_f16) { + run_load_store_global, 128>(128, 8); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Double +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TileIterator, tile_128x8_contiguous_f64) { + run_load_store_global, 128>(128, 8); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Int +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TileIterator, tile_128x8_contiguous_s32) { + run_load_store_global, 128>(128, 8); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +} // namespace test diff --git a/tools/test/unit/core/zip_tile_iterator.cu b/tools/test/unit/core/zip_tile_iterator.cu new file mode 100644 index 000000000..2117e012d --- /dev/null +++ b/tools/test/unit/core/zip_tile_iterator.cu @@ -0,0 +1,173 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#include "cutlass_unit_test.h" +#include "tools/util/host_matrix.h" +#include "tools/util/tensor_view_io.h" +#include "cutlass/shape.h" +#include "cutlass/predicate_vector.h" +#include "cutlass/tile_iterator.h" +#include "cutlass/tile_traits_standard.h" + +#include "cutlass/zip_tile_iterator.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace test { + +/// Kernel which can use tile iterators and zip iterators +template +__global__ void zip_iterator_kernel( + typename LoadIterator::Params load_params, + typename StoreIterator::Params store_params) { + + LoadIterator load_iterator(load_params); + StoreIterator store_iterator(store_params); + + typename LoadIterator::Fragment fragment; + + load_iterator.load_post_increment(fragment); + store_iterator.store_post_increment(fragment); +} + +} // namespace test + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Test framework +template +struct ZipIteratorTest { + + // + // Type definitions + // + + static int const kThreadCount = 128; + + typedef cutlass::TileTraitsStandard TileTraits; + + typedef cutlass::TileLoadIterator ScalarLoadIterator; + typedef cutlass::TileStoreIterator ScalarStoreIterator; + + typedef cutlass::ZipTileIterator ZipLoadIterator; + typedef cutlass::ZipTileIterator ZipStoreIterator; + + // + // Data members + // + + cutlass::HostMatrix tensor_source_real; + cutlass::HostMatrix tensor_source_imag; + + cutlass::HostMatrix tensor_dest_real; + cutlass::HostMatrix tensor_dest_imag; + + // + // Methods + // + + /// Ctor + ZipIteratorTest() { + + tensor_source_real.resize(cutlass::make_Coord(Shape::kH, Shape::kW), cutlass::MatrixLayout::kRowMajor); + tensor_source_imag.resize(cutlass::make_Coord(Shape::kH, Shape::kW), cutlass::MatrixLayout::kRowMajor); + tensor_dest_real.resize(cutlass::make_Coord(Shape::kH, Shape::kW), cutlass::MatrixLayout::kRowMajor); + tensor_dest_imag.resize(cutlass::make_Coord(Shape::kH, Shape::kW), cutlass::MatrixLayout::kRowMajor); + } + + /// Runs test + void run() { + + tensor_source_real.fill_sequential(); + tensor_source_imag.fill_sequential(); + + tensor_dest_real.fill(0); + tensor_dest_imag.fill(0); + + tensor_source_real.sync_device(); + tensor_source_imag.sync_device(); + tensor_dest_real.sync_device(); + tensor_dest_imag.sync_device(); + + + typename ZipLoadIterator::Params load_params; + typename ZipStoreIterator::Params store_params; + + load_params.first.initialize( + tensor_source_real.device_data(), + 0, + tensor_source_real.leading_dim(), + 1 + ); + + load_params.second.initialize( + tensor_source_imag.device_data(), + 0, + tensor_source_real.leading_dim(), + 1 + ); + + store_params.first.initialize( + tensor_dest_real.device_data(), + 0, + tensor_source_real.leading_dim(), + 1 + ); + + store_params.second.initialize( + tensor_dest_imag.device_data(), + 0, + tensor_source_real.leading_dim(), + 1 + ); + + /// Launch kernel + test::zip_iterator_kernel<<< + dim3(1,1), + dim3(kThreadCount, 1) + >>>( + load_params, + store_params + ); + + cudaError_t result = cudaGetLastError(); + EXPECT_EQ(result, cudaSuccess) << "Error on kernel launch: " << cudaGetErrorString(result); + + tensor_dest_real.sync_host(); + tensor_dest_imag.sync_host(); + + // Verify equivalence + EXPECT_TRUE(tensor_dest_real.bit_equals(tensor_source_real)); + EXPECT_TRUE(tensor_dest_imag.bit_equals(tensor_source_imag)); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(ZipTileIterator, tile_128x8) { + ZipIteratorTest >().run(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + diff --git a/tools/test/unit/cutlass_unit_test.cpp b/tools/test/unit/cutlass_unit_test.cpp index ec78c8a65..be9bd15bd 100644 --- a/tools/test/unit/cutlass_unit_test.cpp +++ b/tools/test/unit/cutlass_unit_test.cpp @@ -29,19 +29,65 @@ #include #include +/// Sets flags for Unit test void set_gtest_flag() { // Default flags can be overwritten by --gtest_filter from commandline + cudaError_t err; + + int cudaDeviceId; + err = cudaGetDevice(&cudaDeviceId); + if (cudaSuccess != err) { + std::cerr << "*** Error: Could not detect active GPU device ID" + << " [" << cudaGetErrorString(err) << "]" << std::endl; + exit(1); + } + cudaDeviceProp deviceProperties; - cudaGetDeviceProperties(&deviceProperties, 0); + err = cudaGetDeviceProperties(&deviceProperties, cudaDeviceId); + if (cudaSuccess != err) { + std::cerr << "*** Error: Could not get device properties for GPU " << cudaDeviceId << " [" + << cudaGetErrorString(err) << "]" << std::endl; + exit(1); + } int deviceMajorMinor = deviceProperties.major * 10 + deviceProperties.minor; - if (deviceMajorMinor < 53) - ::testing::GTEST_FLAG(filter) = "-*Igemm*:*Hgemm*:*mma*"; - else if (deviceMajorMinor < 61) - ::testing::GTEST_FLAG(filter) = "-*Igemm*:*mma*"; - else if (deviceMajorMinor < 70) - ::testing::GTEST_FLAG(filter) = "-*mma*"; + // Defines text filters for each GEMM kernel based on minimum supported compute capability + struct { + + /// Unit test filter string + char const *filter; + + /// Minimum compute capability for the kernels in the named test + int compute_capability; + + /// If true, the tests are enabled strictly for one compute capability + bool experimental; + } test_filters[] = { + { "Sgemm*", 50, false }, + { "Dgemm*", 60, false }, + { "Fp16_sgemm*", 60, false }, + { "Hgemm*", 60, false }, + { "Igemm*", 61, false }, + { "WmmaGemm*", 70, false }, + { "WmmaInt8*", 72, false }, + { "WmmaInt4*", 75, true }, + { "WmmaBinary*", 75, true }, + { 0, 0, false } + }; + + // Set negative test filters + std::stringstream ss; + ss << "-"; + for (int i = 0, j = 0; test_filters[i].filter; ++i) { + if (deviceMajorMinor < test_filters[i].compute_capability || + (test_filters[i].experimental && deviceMajorMinor != test_filters[i].compute_capability)) { + + ss << (j++ ? ":" : "") << test_filters[i].filter; + } + } + + ::testing::GTEST_FLAG(filter) = ss.str(); } int main(int argc, char* arg[]) { diff --git a/tools/test/unit/cutlass_unit_test.h b/tools/test/unit/cutlass_unit_test.h index 0d559ca5c..2ffced582 100644 --- a/tools/test/unit/cutlass_unit_test.h +++ b/tools/test/unit/cutlass_unit_test.h @@ -28,3 +28,4 @@ #pragma diag_suppress boolean_controlling_expr_is_constant #include #pragma diag_warning boolean_controlling_expr_is_constant +#pragma warning( disable : 4503) diff --git a/tools/test/unit/gemm/batched_strided_dgemm_128x128x8.cu b/tools/test/unit/gemm/batched_strided_dgemm_128x128x8.cu new file mode 100644 index 000000000..8b0bc1635 --- /dev/null +++ b/tools/test/unit/gemm/batched_strided_dgemm_128x128x8.cu @@ -0,0 +1,103 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/dgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" + + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(dgemm_strided_batched_128x128x8, dgemm_256x384x64x3_nn) { + typedef cutlass::gemm::DgemmTraits > + DgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(dgemm_strided_batched_128x128x8, sgemm_128x384x192x2_nn) { + typedef cutlass::gemm::DgemmTraits > + DgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(dgemm_strided_batched_128x128x8, dgemm_256x384x64x3_nt) { + typedef cutlass::gemm::DgemmTraits > + DgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(dgemm_strided_batched_128x128x8, sgemm_128x384x192x2_nt) { + typedef cutlass::gemm::DgemmTraits > + DgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(dgemm_strided_batched_128x128x8, dgemm_256x384x64x3_tn) { + typedef cutlass::gemm::DgemmTraits > + DgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(dgemm_strided_batched_128x128x8, sgemm_128x384x192x2_tn) { + typedef cutlass::gemm::DgemmTraits > + DgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(dgemm_strided_batched_128x128x8, dgemm_256x384x64x3_tt) { + typedef cutlass::gemm::DgemmTraits > + DgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + diff --git a/tools/test/unit/gemm/batched_strided_hgemm_128x128x8.cu b/tools/test/unit/gemm/batched_strided_hgemm_128x128x8.cu new file mode 100644 index 000000000..4738d29f9 --- /dev/null +++ b/tools/test/unit/gemm/batched_strided_hgemm_128x128x8.cu @@ -0,0 +1,112 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/hgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" + + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Hgemm_strided_batched_128x128x8, hgemm_256x384x64x3_nn) { + typedef cutlass::gemm::HgemmTraits > + HgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Hgemm_strided_batched_128x128x8, hgemm_128x384x192x2_nn) { + typedef cutlass::gemm::HgemmTraits > + HgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Hgemm_strided_batched_128x128x8, hgemm_256x384x64x3_nt) { + typedef cutlass::gemm::HgemmTraits > + HgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Hgemm_strided_batched_128x128x8, hgemm_128x384x192x2_nt) { + typedef cutlass::gemm::HgemmTraits > + HgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Hgemm_strided_batched_128x128x8, hgemm_256x384x64x3_tn) { + typedef cutlass::gemm::HgemmTraits > + HgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Hgemm_strided_batched_128x128x8, hgemm_128x384x192x2_tn) { + typedef cutlass::gemm::HgemmTraits > + HgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Hgemm_strided_batched_128x128x8, hgemm_256x384x64x3_tt) { + typedef cutlass::gemm::HgemmTraits > + HgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Hgemm_strided_batched_128x128x8, hgemm_128x384x192x2_tt) { + typedef cutlass::gemm::HgemmTraits > + HgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/batched_strided_sgemm_128x128x8.cu b/tools/test/unit/gemm/batched_strided_sgemm_128x128x8.cu new file mode 100644 index 000000000..ffeba34f4 --- /dev/null +++ b/tools/test/unit/gemm/batched_strided_sgemm_128x128x8.cu @@ -0,0 +1,135 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_nn) { + typedef cutlass::gemm::SgemmTraits > + SgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_nn) { + typedef cutlass::gemm::SgemmTraits > + SgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_strided_batched_128x128x8, sgemm_127x384x192x2_nn) { + typedef cutlass::gemm::SgemmTraits > + SgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(127/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_strided_batched_128x128x8, sgemm_127x388x190x2_nn) { + typedef cutlass::gemm::SgemmTraits > + SgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(127/*m*/, 388/*n*/, 190/*k*/, 2 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_nt) { + typedef cutlass::gemm::SgemmTraits > + SgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_nt) { + typedef cutlass::gemm::SgemmTraits > + SgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_tn) { + typedef cutlass::gemm::SgemmTraits > + SgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_tn) { + typedef cutlass::gemm::SgemmTraits > + SgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_strided_batched_128x128x8, sgemm_256x384x64x3_tt) { + typedef cutlass::gemm::SgemmTraits > + SgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(256/*m*/, 384/*n*/, 64/*k*/, 3 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_strided_batched_128x128x8, sgemm_128x384x192x2_tt) { + typedef cutlass::gemm::SgemmTraits > + SgemmTraits; + //think about using run_gemm directly + run_batched_strided_gemm(128/*m*/, 384/*n*/, 192/*k*/, 2 /*batch_size*/); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/binary_gemm.h b/tools/test/unit/gemm/binary_gemm.h new file mode 100644 index 000000000..927413838 --- /dev/null +++ b/tools/test/unit/gemm/binary_gemm.h @@ -0,0 +1,77 @@ +/*************************************************************************************************** +* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +* +* Redistribution and use in source and binary forms, with or without modification, are permitted +* provided that the following conditions are met: +* * Redistributions of source code must retain the above copyright notice, this list of +* conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright notice, this list of +* conditions and the following disclaimer in the documentation and/or other materials +* provided with the distribution. +* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used +* to endorse or promote products derived from this software without specific prior written +* permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +**************************************************************************************************/ + +#include "cutlass/cutlass.h" + +template +static void run_binary_gemm(int m, int n, int k, int alpha = 1, int beta = 1) { + typedef cutlass::gemm::Gemm Gemm; + typename Gemm::Params params; + + test::GemmTestbed, // AType + cutlass::Vector, // BType + int32_t, // CType + int32_t, // Accumulator + int // Scalar + > + testbed(m, + n, + k / 32, + test::convert(GemmTraits_::kLayoutA), + test::convert(GemmTraits_::kLayoutB), + alpha, + beta); + + // Initializes the input vectors for computation + testbed.initialize_binary(); + + // Compute the reference result on the host (CPU) + testbed.compute_host(); + + params.initialize(testbed.M(), + testbed.N(), + testbed.K() * 32, + testbed.alpha, + testbed.ptr_A(), + testbed.lda(), + testbed.ptr_B(), + testbed.ldb(), + testbed.beta, + testbed.ptr_C_initial(), + testbed.ldc(), + testbed.ptr_computed(), + testbed.ldc()); + + Gemm::launch(params); + + cudaError_t result = cudaDeviceSynchronize(); + ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) + << "\n"; + + testbed.computed.sync_host(); + + // Check the results + ASSERT_TRUE(testbed.computed.bit_equals(testbed.ref_host)); +} diff --git a/tools/test/unit/gemm/dgemm.cu b/tools/test/unit/gemm/dgemm.cu index be78450b9..ebfeba920 100644 --- a/tools/test/unit/gemm/dgemm.cu +++ b/tools/test/unit/gemm/dgemm.cu @@ -24,11 +24,11 @@ **************************************************************************************************/ #include #include -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/dgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -40,6 +40,7 @@ TEST(Dgemm_64x32x8, dgemm_64x32x8_nt) { run_gemm(64, 32, 8); } + TEST(Dgemm_64x32x8, dgemm_256x128x64_nt) { typedef cutlass::gemm::DgemmTraits, + cutlass::gemm::LinearScalingDevicePtr + > + SgemmTraits; + + // Define a GEMM problem size + int const m = 1025; + int const n = 512; + int const k = 128; + + // Define scalars + float alpha_host = 3; + float beta_host = 2; + + // Define a device-backed tensor to contain the scalars + cutlass::HostTensor device_scalars(2); + + // Copy scalar values to device memory for device-ptr mode + device_scalars.at(0) = alpha_host; + device_scalars.at(1) = beta_host; + device_scalars.sync_device(); + + // Construct a GemmTestbed instance + test::GemmTestbed< + float, // AType + float, // BType + float, // CType + float, // Accumulator + float // Scalar + > + testbed(m, + n, + k, + test::convert(SgemmTraits::kLayoutA), + test::convert(SgemmTraits::kLayoutB), + alpha_host, + beta_host); + + testbed.initialize(); + + // + // Construct a CUTLASS GEMM and initialize parameters + // + typedef typename SgemmTraits::KernelClass Gemm; + typename Gemm::Params params; + + params.initialize(testbed.M(), + testbed.N(), + testbed.K(), + 0, // alpha ignored + testbed.ptr_A(), + testbed.lda(), + testbed.ptr_B(), + testbed.ldb(), + 0, // beta ignored + testbed.ptr_C_initial(), + testbed.ldc(), + testbed.ptr_computed(), + testbed.ldc()); + + // Explicitly call the epilogue functor's initialize method to pass additional arguments + params.epilogue.functor.initialize( + device_scalars.device_data() + 0, // pointer to alpha in device memory + device_scalars.device_data() + 1); // pointer to beta in device memory + + // Launch the CUTLASS SGEMM kernel + Gemm::launch(params); + + // Report any errors + cudaError_t result = cudaDeviceSynchronize(); + ASSERT_EQ(result, cudaSuccess) + << "\nCUDA kernel launch error: " << cudaGetErrorString(result) + << "\n"; + + // Verify result + ASSERT_TRUE(testbed.verify_with_cublas()); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/fp16_sgemm_fp16_128x128x16.cu b/tools/test/unit/gemm/fp16_sgemm_fp16_128x128x16.cu new file mode 100644 index 000000000..41ed3c855 --- /dev/null +++ b/tools/test/unit/gemm/fp16_sgemm_fp16_128x128x16.cu @@ -0,0 +1,321 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/fp16_sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" + + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp16_fp16_128x128x16, fp16_sgemm_fp16_128x128x16_nn) { + /*A, B, C, D are half typed, accumulator is always float for sgemm + alpha and beta are both fp16*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + half, /*C type*/ + half, /*D type*/ + half /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 128, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp16_fp16_128x128x16, fp16_sgemm_fp16_128x128x16_nt) { + /*A, B, C, D are half typed, accumulator is always float for sgemm + alpha and beta are both fp16*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + half, /*C type*/ + half, /*D type*/ + half /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 128, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp16_fp16_128x128x16, fp16_sgemm_fp16_128x128x16_tn) { + /*A, B, C, D are half typed, accumulator is always float for sgemm + alpha and beta are both fp16*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + half, /*C type*/ + half, /*D type*/ + half /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 128, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp16_fp16_128x128x16, fp16_sgemm_fp16_128x128x16_tt) { + /*A, B, C, D are half typed, accumulator is always float for sgemm + alpha and beta are both fp16*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + half, /*C type*/ + half, /*D type*/ + half /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 128, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp16_fp16_128x128x16, fp16_sgemm_fp16_128x112x17_nn) { + /*A, B, C, D are half typed, accumulator is always float for sgemm + alpha and beta are both fp16*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + half, /*C type*/ + half, /*D type*/ + half /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 112, 17); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp16_fp16_128x128x16, fp16_sgemm_fp16_128x112x17_nt) { + /*A, B, C, D are half typed, accumulator is always float for sgemm + alpha and beta are both fp16*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + half, /*C type*/ + half, /*D type*/ + half /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 112, 17); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp16_fp16_128x128x16, fp16_sgemm_fp16_128x112x17_tn) { + /*A, B, C, D are half typed, accumulator is always float for sgemm + alpha and beta are both fp16*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + half, /*C type*/ + half, /*D type*/ + half /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 112, 17); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp16_fp16_128x128x16, fp16_sgemm_fp16_128x112x17_tt) { + /*A, B, C, D are half typed, accumulator is always float for sgemm + alpha and beta are both fp16*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + half, /*C type*/ + half, /*D type*/ + half /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 112, 17); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//alpha and beta are both fp32 +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp32_fp16_128x128x16, fp16_sgemm_fp16_128x128x16_nn) { + /*A, B, C, D are half typed, accumulator is always float for sgemm + alpha and beta are both fp32*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + half, /*C type*/ + half, /*D type*/ + float /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 128, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp32_fp16_128x128x16, fp16_sgemm_fp16_128x128x16_nt) { + /*A, B, C, D are half typed, accumulator is always float for sgemm + alpha and beta are both fp32*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + half, /*C type*/ + half, /*D type*/ + float /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 128, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp32_fp16_128x128x16, fp16_sgemm_fp16_128x128x16_tn) { + /*A, B, C, D are half typed, accumulator is always float for sgemm + alpha and beta are both fp32*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + half, /*C type*/ + half, /*D type*/ + float /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 128, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp32_fp16_128x128x16, fp16_sgemm_fp16_128x128x16_tt) { + /*A, B, C, D are half typed, accumulator is always float for sgemm + alpha and beta are both fp32*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + half, /*C type*/ + half, /*D type*/ + float /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 128, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp32_fp16_128x128x16, fp16_sgemm_fp16_128x112x17_nn) { + /*A, B, C, D are half typed, accumulator is always float for sgemm + alpha and beta are both fp32*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + half, /*C type*/ + half, /*D type*/ + float /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 112, 17); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp32_fp16_128x128x16, fp16_sgemm_fp16_128x112x17_nt) { + /*A, B, C, D are half typed, accumulator is always float for sgemm + alpha and beta are both fp32*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + half, /*C type*/ + half, /*D type*/ + float /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 112, 17); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp32_fp16_128x128x16, fp16_sgemm_fp16_128x112x17_tn) { + /*A, B, C, D are half typed, accumulator is always float for sgemm + alpha and beta are both fp32*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + half, /*C type*/ + half, /*D type*/ + float /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 112, 17); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp32_fp16_128x128x16, fp16_sgemm_fp16_128x112x17_tt) { + /*A, B, C, D are half typed, accumulator is always float for sgemm + alpha and beta are both fp32*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + half, /*C type*/ + half, /*D type*/ + float /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 112, 17); +} + diff --git a/tools/test/unit/gemm/fp16_sgemm_fp32_128x128x16.cu b/tools/test/unit/gemm/fp16_sgemm_fp32_128x128x16.cu new file mode 100644 index 000000000..d45ecb7bd --- /dev/null +++ b/tools/test/unit/gemm/fp16_sgemm_fp32_128x128x16.cu @@ -0,0 +1,174 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/fp16_sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" + + + + +TEST(Fp16_sgemm_alphaFp32_fp32_128x128x16, fp16_sgemm_fp32_128x128x16_nn) { + /*A, B are half typed, accumulator is always float for sgemm, C, D are float typed + alpha and beta are both fp32*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + float, /*C type*/ + float, /*D type*/ + float /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 128, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp32_fp32_128x128x16, fp16_sgemm_fp32_128x128x16_nt) { + /*A, B are half typed, accumulator is always float for sgemm, C, D are float typed + alpha and beta are both fp32*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + float, /*C type*/ + float, /*D type*/ + float /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 128, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp32_fp32_128x128x16, fp16_sgemm_fp32_128x128x16_tn) { + /*A, B are half typed, accumulator is always float for sgemm, C, D are float typed + alpha and beta are both fp32*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + float, /*C type*/ + float, /*D type*/ + float /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 128, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp32_fp32_128x128x16, fp16_sgemm_fp32_128x128x16_tt) { + /*A, B are half typed, accumulator is always float for sgemm, C, D are float typed + alpha and beta are both fp32*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + float, /*C type*/ + float, /*D type*/ + float /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 128, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp32_fp32_128x128x16, fp16_sgemm_fp32_128x112x17_nn) { + /*A, B are half typed, accumulator is always float for sgemm, C, D are float typed + alpha and beta are both fp32*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + float, /*C type*/ + float, /*D type*/ + float /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 112, 17); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp32_fp32_128x128x16, fp16_sgemm_fp32_128x112x17_nt) { + /*A, B are half typed, accumulator is always float for sgemm, C, D are float typed + alpha and beta are both fp32*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + float, /*C type*/ + float, /*D type*/ + float /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 112, 17); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp32_fp32_128x128x16, fp16_sgemm_fp32_128x112x17_tn) { + /*A, B are half typed, accumulator is always float for sgemm, C, D are float typed + alpha and beta are both fp32*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + float, /*C type*/ + float, /*D type*/ + float /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 112, 17); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Fp16_sgemm_alphaFp32_fp32_128x128x16, fp16_sgemm_fp32_128x112x17_tt) { + /*A, B are half typed, accumulator is always float for sgemm, C, D are float typed + alpha and beta are both fp32*/ + typedef cutlass::gemm::Fp16SgemmSgemmTraits, + half, /*A type*/ + half, /*B type*/ + float, /*C type*/ + float, /*D type*/ + float /*alpha, beta type*/ + > + SgemmTraits; + run_gemm(128, 112, 17); +} diff --git a/tools/test/unit/gemm/gemm_nvrtc.cu b/tools/test/unit/gemm/gemm_nvrtc.cu index 0607b2173..89dfe1a61 100644 --- a/tools/test/unit/gemm/gemm_nvrtc.cu +++ b/tools/test/unit/gemm/gemm_nvrtc.cu @@ -22,13 +22,13 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/dgemm_traits.h" +#include "cutlass/gemm/igemm_traits.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/gemm_nvrtc.h" //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/gemm_nvrtc.h b/tools/test/unit/gemm/gemm_nvrtc.h index a8710313f..fae1e7d6f 100644 --- a/tools/test/unit/gemm/gemm_nvrtc.h +++ b/tools/test/unit/gemm/gemm_nvrtc.h @@ -25,10 +25,10 @@ #define NVRTC_GET_TYPE_NAME 1 -#include +#include "cutlass/cutlass.h" #include -#include +#include "tools/nvrtc/cutlass/nvrtc/environment.h" #include static inline bool check_nvrtc_error(nvrtcResult error) { @@ -76,13 +76,13 @@ static __host__ void run_gemm_nvrtc( nvrtcResult result_nvrtc; nvrtcProgram program; static char const *src = - "#include \n" - "#include \n" - "#include \n" - "#include \n" + "#include "cutlass/gemm/gemm.h"\n" + "#include "cutlass/gemm/sgemm_traits.h"\n" + "#include "cutlass/gemm/dgemm_traits.h"\n" + "#include "cutlass/gemm/igemm_traits.h"\n" #if defined(CUTLASS_NVRTC_HAS_FP16) - "#include \n" - "#include \n" + "#include "cutlass/gemm/hgemm_traits.h"\n" + "#include "cutlass/gemm/wmma_gemm_traits.h"\n" #endif ; diff --git a/tools/test/unit/gemm/gemm_shared_mem_layouts.cu b/tools/test/unit/gemm/gemm_shared_mem_layouts.cu deleted file mode 100644 index 6da198df3..000000000 --- a/tools/test/unit/gemm/gemm_shared_mem_layouts.cu +++ /dev/null @@ -1,621 +0,0 @@ -/*************************************************************************************************** - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are permitted - * provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright notice, this list of - * conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used - * to endorse or promote products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - **************************************************************************************************/ -#include -#include -#include -#include -#include -#include -#include -#include - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -namespace test { - - // M/N/K struct. - struct GemmDesc { - int m, n, k; - inline __host__ __device__ GemmDesc(int m_, int n_, int k_) : m(m_), n(n_), k(k_) {} - }; - - /// Simple test to load from global memory and store to shared memory - - // Loading from global memory and storing to shared memory for A - template - __global__ void gemm_load_global_store_shared_a( - typename Traits::GlobalLoadStreamA::Scalar *output, - typename Traits::GlobalLoadStreamA::Scalar const *input, - int M, - int N, - int K, - int ldm, - int skew) { - - //Create shared memory. - __shared__ typename Traits::SharedStorage shared_storage; - - // Create those iterators. - typedef typename Traits::GlobalLoadStreamA GlobalLoadStreamA; - - typename GlobalLoadStreamA::Params global_load_params; - GemmDesc desc(M, N, K); - global_load_params.initialize(desc, input, ldm); - - GlobalLoadStreamA stream_a(global_load_params, shared_storage.main_loop.stream_a.global, M, N, K, cutlass::make_Coord(0, 0, 0)); - stream_a.copy(); - stream_a.commit(); - - // store barrier - __syncthreads(); - - // one thread writes everything out - if (threadIdx.x == 0) { - for (int i = 0; i < (M+skew)*K; ++i) { - output[i] = shared_storage.main_loop.stream_a.shared.scalars[i]; - } - } - - } - - // Loading from global memory and storing to shared memory for B - template - __global__ void gemm_load_global_store_shared_b( - typename Traits::GlobalLoadStreamB::Scalar *output, - typename Traits::GlobalLoadStreamB::Scalar const *input, - int M, - int N, - int K, - int ldm, - int skew) { - - //Create shared memory. - __shared__ typename Traits::SharedStorage shared_storage; - - // Create those iterators. - typedef typename Traits::GlobalLoadStreamB GlobalLoadStreamB; - typename GlobalLoadStreamB::Params global_load_params; - GemmDesc desc(M, N, K); - global_load_params.initialize(desc, input, ldm); - - GlobalLoadStreamB stream_b(global_load_params, shared_storage.main_loop.stream_b.global, M, N, K, cutlass::make_Coord(0, 0, 0)); - stream_b.copy(); - stream_b.commit(); - - // store barrier - __syncthreads(); - - // one thread writes everything out - if (threadIdx.x == 0) { - for (int i = 0; i < (N+skew)*K; ++i) { - output[i] = shared_storage.main_loop.stream_b.shared.scalars[i]; - } - } - - } - -//////////////////////////////////////////////////////////////////////////////////////////////////// -TEST(GemmSharedMemLayout, A_float_contiguous) { - - static int const M = 64; - static int const N = 64; - static int const K = 8; - - typedef cutlass::Shape ThreadBlockTile; - - typedef cutlass::gemm::SgemmTraits - SgemmTraits; - - - cutlass::HostTensor input; - cutlass::HostTensor output; - int skew = 0; - - input.resize_matrix(ThreadBlockTile::kW, ThreadBlockTile::kD, - cutlass::MatrixLayout::kColumnMajor); - output.resize_matrix(ThreadBlockTile::kW, ThreadBlockTile::kD, - cutlass::MatrixLayout::kColumnMajor); - - - input.fill_linear(cutlass::make_Coord(1, 1, ThreadBlockTile::kW, 1)); - - output.fill(0); - - test::gemm_load_global_store_shared_a< SgemmTraits ><<< - dim3(1,1,1), - dim3(SgemmTraits::kThreads, 1) - >>>( - output.device_data(), - input.device_data(), - M, - N, - K, - M, - skew - ); - - cudaError_t result = cudaDeviceSynchronize(); - ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) - << "\n"; - - - output.sync_host(); - - EXPECT_TRUE(input.bit_equals(output)); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -TEST(GemmSharedMemLayout, A_float_crosswise) { - - static int const M = 64; - static int const N = 64; - static int const K = 8; - - typedef cutlass::Shape ThreadBlockTile; - - typedef cutlass::gemm::SgemmTraits - SgemmTraits; - - - cutlass::HostTensor input; - cutlass::HostTensor output; - int skew = 4; - - input.resize_matrix(ThreadBlockTile::kW, ThreadBlockTile::kD, - cutlass::MatrixLayout::kRowMajor); - output.resize_matrix(ThreadBlockTile::kW + skew, ThreadBlockTile::kD, - cutlass::MatrixLayout::kColumnMajor); - - - input.fill_linear(cutlass::make_Coord(1, ThreadBlockTile::kD, 1, 1)); - - output.fill(0); - - test::gemm_load_global_store_shared_a< SgemmTraits ><<< - dim3(1,1,1), - dim3(SgemmTraits::kThreads, 1) - >>>( - output.device_data(), - input.device_data(), - M, - N, - K, - K, - skew - ); - - cudaError_t result = cudaDeviceSynchronize(); - ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) - << "\n"; - - output.sync_host(); - - EXPECT_TRUE(input.bit_equals(output)); - -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -TEST(GemmSharedMemLayout, B_float_contiguous) { - - static int const M = 64; - static int const N = 64; - static int const K = 8; - - typedef cutlass::Shape ThreadBlockTile; - - typedef cutlass::gemm::SgemmTraits - SgemmTraits; - - - cutlass::HostTensor input; - cutlass::HostTensor output; - int skew = 0; - - input.resize_matrix(ThreadBlockTile::kD, ThreadBlockTile::kH, - cutlass::MatrixLayout::kRowMajor); - output.resize_matrix(ThreadBlockTile::kD, ThreadBlockTile::kH, - cutlass::MatrixLayout::kRowMajor); - - - input.fill_linear(cutlass::make_Coord(1, ThreadBlockTile::kH, 1, 1)); - - output.fill(0); - - test::gemm_load_global_store_shared_b< SgemmTraits ><<< - dim3(1,1,1), - dim3(SgemmTraits::kThreads, 1) - >>>( - output.device_data(), - input.device_data(), - M, - N, - K, - N, - skew - ); - - cudaError_t result = cudaDeviceSynchronize(); - ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) - << "\n"; - - - output.sync_host(); - - EXPECT_TRUE(input.bit_equals(output)); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -TEST(GemmSharedMemLayout, B_float_crosswise) { - - static int const M = 64; - static int const N = 64; - static int const K = 8; - - typedef cutlass::Shape ThreadBlockTile; - - typedef cutlass::gemm::SgemmTraits - SgemmTraits; - - - cutlass::HostTensor input; - cutlass::HostTensor output; - int skew = 4; - - input.resize_matrix(ThreadBlockTile::kD, ThreadBlockTile::kH, - cutlass::MatrixLayout::kColumnMajor); - output.resize_matrix(ThreadBlockTile::kD + skew, ThreadBlockTile::kH, - cutlass::MatrixLayout::kRowMajor); - - - input.fill_linear(cutlass::make_Coord(1, 1, ThreadBlockTile::kD, 1)); - - output.fill(0); - - test::gemm_load_global_store_shared_b< SgemmTraits ><<< - dim3(1,1,1), - dim3(SgemmTraits::kThreads, 1) - >>>( - output.device_data(), - input.device_data(), - M, - N, - K, - K, - skew - ); - - cudaError_t result = cudaDeviceSynchronize(); - ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) - << "\n"; - - output.sync_host(); - - EXPECT_TRUE(input.bit_equals(output)); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -TEST(GemmSharedMemLayout, A_double_contiguous) { - - static int const M = 64; - static int const N = 64; - static int const K = 8; - - typedef cutlass::Shape ThreadBlockTile; - - typedef cutlass::gemm::DgemmTraits - DgemmTraits; - - - cutlass::HostTensor input; - cutlass::HostTensor output; - int skew = 0; - - input.resize_matrix(ThreadBlockTile::kW, ThreadBlockTile::kD, - cutlass::MatrixLayout::kColumnMajor); - output.resize_matrix(ThreadBlockTile::kW, ThreadBlockTile::kD, - cutlass::MatrixLayout::kColumnMajor); - - - input.fill_linear(cutlass::make_Coord(1, 1, ThreadBlockTile::kW, 1)); - - output.fill(0); - - test::gemm_load_global_store_shared_a< DgemmTraits ><<< - dim3(1,1,1), - dim3(DgemmTraits::kThreads, 1) - >>>( - output.device_data(), - input.device_data(), - M, - N, - K, - M, - skew - ); - - cudaError_t result = cudaDeviceSynchronize(); - ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) - << "\n"; - - - output.sync_host(); - - EXPECT_TRUE(input.bit_equals(output)); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -TEST(GemmSharedMemLayout, A_double_crosswise) { - - static int const M = 64; - static int const N = 64; - static int const K = 8; - - typedef cutlass::Shape ThreadBlockTile; - - typedef cutlass::gemm::DgemmTraits - DgemmTraits; - - - cutlass::HostTensor input; - cutlass::HostTensor output; - int skew = 2; - - input.resize_matrix(ThreadBlockTile::kW, ThreadBlockTile::kD, - cutlass::MatrixLayout::kRowMajor); - output.resize_matrix(ThreadBlockTile::kW + skew, ThreadBlockTile::kD, - cutlass::MatrixLayout::kColumnMajor); - - - input.fill_linear(cutlass::make_Coord(1, ThreadBlockTile::kD, 1, 1)); - - output.fill(0); - - test::gemm_load_global_store_shared_a< DgemmTraits ><<< - dim3(1,1,1), - dim3(DgemmTraits::kThreads, 1) - >>>( - output.device_data(), - input.device_data(), - M, - N, - K, - K, - skew - ); - - cudaError_t result = cudaDeviceSynchronize(); - ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) - << "\n"; - - output.sync_host(); - - EXPECT_TRUE(input.bit_equals(output)); - -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -TEST(GemmSharedMemLayout, B_double_contiguous) { - - static int const M = 64; - static int const N = 64; - static int const K = 8; - - typedef cutlass::Shape ThreadBlockTile; - - typedef cutlass::gemm::DgemmTraits - DgemmTraits; - - - cutlass::HostTensor input; - cutlass::HostTensor output; - int skew = 0; - - input.resize_matrix(ThreadBlockTile::kD, ThreadBlockTile::kH, - cutlass::MatrixLayout::kRowMajor); - output.resize_matrix(ThreadBlockTile::kD, ThreadBlockTile::kH, - cutlass::MatrixLayout::kRowMajor); - - - input.fill_linear(cutlass::make_Coord(1, ThreadBlockTile::kH, 1, 1)); - - output.fill(0); - - test::gemm_load_global_store_shared_b< DgemmTraits ><<< - dim3(1,1,1), - dim3(DgemmTraits::kThreads, 1) - >>>( - output.device_data(), - input.device_data(), - M, - N, - K, - N, - skew - ); - - cudaError_t result = cudaDeviceSynchronize(); - ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) - << "\n"; - - - output.sync_host(); - - EXPECT_TRUE(input.bit_equals(output)); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -TEST(GemmSharedMemLayout, B_double_crosswise) { - - static int const M = 64; - static int const N = 64; - static int const K = 8; - - typedef cutlass::Shape ThreadBlockTile; - - typedef cutlass::gemm::DgemmTraits - DgemmTraits; - - - cutlass::HostTensor input; - cutlass::HostTensor output; - int skew = 2; - - input.resize_matrix(ThreadBlockTile::kD, ThreadBlockTile::kH, - cutlass::MatrixLayout::kColumnMajor); - output.resize_matrix(ThreadBlockTile::kD + skew, ThreadBlockTile::kH, - cutlass::MatrixLayout::kRowMajor); - - - input.fill_linear(cutlass::make_Coord(1, 1, ThreadBlockTile::kD, 1)); - - output.fill(0); - - test::gemm_load_global_store_shared_b< DgemmTraits ><<< - dim3(1,1,1), - dim3(DgemmTraits::kThreads, 1) - >>>( - output.device_data(), - input.device_data(), - M, - N, - K, - K, - skew - ); - - cudaError_t result = cudaDeviceSynchronize(); - ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) - << "\n"; - - output.sync_host(); - - EXPECT_TRUE(input.bit_equals(output)); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -TEST(GemmSharedMemLayout, A_half_crosswise) { - - static int const M = 128; - static int const N = 128; - static int const K = 8; - - typedef cutlass::Shape ThreadBlockTile; - - typedef cutlass::gemm::HgemmTraits - HgemmTraits; - - - cutlass::HostTensor input; - cutlass::HostTensor output; - int skew = 8; - - input.resize_matrix(ThreadBlockTile::kW, ThreadBlockTile::kD, - cutlass::MatrixLayout::kRowMajor); - output.resize_matrix(ThreadBlockTile::kW + skew, ThreadBlockTile::kD, - cutlass::MatrixLayout::kColumnMajor); - - - input.fill_linear(cutlass::make_Coord(1, ThreadBlockTile::kD, 1, 1)); - - output.fill(0); - - test::gemm_load_global_store_shared_a< HgemmTraits ><<< - dim3(1,1,1), - dim3(HgemmTraits::kThreads, 1) - >>>( - output.device_data(), - input.device_data(), - M, - N, - K, - K, - skew - ); - - cudaError_t result = cudaDeviceSynchronize(); - ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) - << "\n"; - - output.sync_host(); - - EXPECT_TRUE(input.bit_equals(output)); - -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -TEST(GemmSharedMemLayout, B_half_crosswise) { - - static int const M = 128; - static int const N = 128; - static int const K = 8; - - typedef cutlass::Shape ThreadBlockTile; - - typedef cutlass::gemm::HgemmTraits - HgemmTraits; - - - cutlass::HostTensor input; - cutlass::HostTensor output; - int skew = 8; - - input.resize_matrix(ThreadBlockTile::kD, ThreadBlockTile::kH, - cutlass::MatrixLayout::kColumnMajor); - output.resize_matrix(ThreadBlockTile::kD + skew, ThreadBlockTile::kH, - cutlass::MatrixLayout::kRowMajor); - - - input.fill_linear(cutlass::make_Coord(1, 1, ThreadBlockTile::kD, 1)); - - output.fill(0); - - test::gemm_load_global_store_shared_b< HgemmTraits ><<< - dim3(1,1,1), - dim3(HgemmTraits::kThreads, 1) - >>>( - output.device_data(), - input.device_data(), - M, - N, - K, - K, - skew - ); - - cudaError_t result = cudaDeviceSynchronize(); - ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) - << "\n"; - - output.sync_host(); - - EXPECT_TRUE(input.bit_equals(output)); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -} - diff --git a/tools/test/unit/gemm/gemm_testbed.h b/tools/test/unit/gemm/gemm_testbed.h index 47e90f61c..e937742ce 100644 --- a/tools/test/unit/gemm/gemm_testbed.h +++ b/tools/test/unit/gemm/gemm_testbed.h @@ -32,30 +32,39 @@ #include #include #include +#include #include -#include -#include +#include "cutlass/matrix_traits.h" +#include "cutlass/util/platform.h" +#include "cutlass/gemm/gemm_coord.h" -#include -#include -#include +#include "tools/util/host_matrix.h" +#include "tools/util/host_matrix_view.h" +#include "tools/util/tensor_view_io.h" +#include "tools/util/type_traits.h" + +#include "tools/util/reference/host/gemm.h" +#include "tools/util/reference/host/tensor_elementwise.h" + +////////////////////////////////////////////////////////////////////////////////////////// namespace cutlass { -//////////////////////////////////////////////////////////////////////////////////////////////////// - template struct WmmaMatrix; -} + +} // namespace cutlass + +////////////////////////////////////////////////////////////////////////////////////////// namespace test { -//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////// template struct GemmTestbedTraits : public cutlass::TypeTraits {}; @@ -66,14 +75,39 @@ template struct GemmTestbedTraits > { static cudaDataType_t const cublas_type = cutlass::TypeTraits::cublas_type; - typedef Scalar_ host_type; - typedef Scalar_ device_type; + typedef typename cutlass::TypeTraits::host_type host_type; + typedef typename cutlass::TypeTraits::device_type device_type; static inline double remove_negative_zero(double x) { return x == -0.0 ? 0.0 : x; } static inline double to_print(double x) { return x; } }; -//////////////////////////////////////////////////////////////////////////////////////////////////// +inline cublasOperation_t convert(cutlass::MatrixLayout::Kind layout) { + switch (layout) { + case cutlass::MatrixLayout::kRowMajor: + return CUBLAS_OP_T; + case cutlass::MatrixLayout::kColumnMajor: + return CUBLAS_OP_N; + default: + break; + } + return CUBLAS_OP_N; +} +inline cutlass::MatrixLayout::Kind convert(cublasOperation_t transform) { + switch (transform) { + case CUBLAS_OP_T: + return cutlass::MatrixLayout::kRowMajor; + case CUBLAS_OP_N: + return cutlass::MatrixLayout::kColumnMajor; + default: + break; + } + return cutlass::MatrixLayout::kColumnMajor; +} + +////////////////////////////////////////////////////////////////////////////////////////// + +/// Testbed for evaluating real-valued GEMMs template struct GemmTestbed { // @@ -81,13 +115,13 @@ struct GemmTestbed { // /// Host tensor for operand A - typedef cutlass::HostTensor HostTensorA; + typedef cutlass::HostMatrix HostMatrixA; /// Host tensor for operand B - typedef cutlass::HostTensor HostTensorB; + typedef cutlass::HostMatrix HostMatrixB; /// Host tensor for operand C - typedef cutlass::HostTensor HostTensorC; + typedef cutlass::HostMatrix HostMatrixC; /// Functor to print errors struct PrintErrors { @@ -98,18 +132,18 @@ struct GemmTestbed { std::ostream& out; /// Reference tensor view - cutlass::HostTensorView const& reference; + HostMatrixC const& reference; /// Computed tensor view - cutlass::HostTensorView const& experimental; + HostMatrixC const& experimental; /// Errors greater than or this amount result in printing integer_t ulps_threshold; /// PrintErrors(std::ostream& _out, - cutlass::HostTensorView const& _reference, - cutlass::HostTensorView const& _experimental, + HostMatrixC const& _reference, + HostMatrixC const& _experimental, integer_t _ulps_threshold = 1) : out(_out), reference(_reference), @@ -117,7 +151,7 @@ struct GemmTestbed { ulps_threshold(_ulps_threshold) {} /// Compares one element - void operator()(CType const& element, typename HostTensorC::Coord_t coord) { + void operator()(CType const& element, typename HostMatrixC::TensorCoord coord) { CType exp = experimental.at(coord); CType ref = reference.at(coord); @@ -165,6 +199,20 @@ struct GemmTestbed { bool only_ones; }; + template + struct RandomBitGenerator { + RandomBitGenerator(int seed = -1) { srand(seed); } + + T operator()() { + uint32_t val = 0; + for (int i = 0; i < 32; i++) { + val |= rand() % 2; + val <<= 1; + } + return T(val); + } + }; + // // Data members // @@ -178,29 +226,32 @@ struct GemmTestbed { /// cuBLAS GEMM algorithm selector cublasGemmAlgo_t algorithm; + /// Problem size as a GemmCoord + cutlass::gemm::GemmCoord problem_size; + /// A matrix operand - HostTensorA A; + HostMatrixA A; /// Layout of A matrix cublasOperation_t layout_A; /// B matrix operand - HostTensorB B; + HostMatrixB B; /// Layout of B matrix cublasOperation_t layout_B; /// C matrix operand - HostTensorC C_initial; + HostMatrixC C_initial; /// Reference result computed on the host - cutlass::HostTensor ref_host; + HostMatrixC ref_host; /// Reference result computed with cublas - HostTensorC ref_cublas; + HostMatrixC ref_cublas; /// Computed result - HostTensorC computed; + HostMatrixC computed; /// Linear scalaring factor Scalar alpha; @@ -208,36 +259,105 @@ struct GemmTestbed { /// Linear scaling factor Scalar beta; + /// batch count + int batch_count; + + /// distance between A[i] and A[i+1] for strided batched gemm + long long int batch_stride_A; + + /// distance between B[i] and B[i+1] for strided batched gemm + long long int batch_stride_B; + + /// distance between C[i] and C[i+1] for strided batched gemm + long long int batch_stride_C; + // // Static helpers // /// Helper to resize a matrix with a given size and layout - template - static void resize(cutlass::HostTensor& tensor, + template + static void resize(cutlass::HostMatrix& tensor, int rows, int columns, cublasOperation_t layout, int ldm = 0) { - if (!ldm) { - ldm = (layout == CUBLAS_OP_N ? rows : columns); - } - typedef cutlass::Coord::Rank> Coord_t; - - size_t matrix_stride = layout == CUBLAS_OP_N ? columns * ldm : rows * ldm; - // TODO: Remove that (int) cast. - Coord_t stride = cutlass::make_Coord( - (int)matrix_stride, layout == CUBLAS_OP_N ? 1 : ldm, layout == CUBLAS_OP_N ? ldm : 1, 1); - Coord_t size = cutlass::make_Coord(1, rows, columns, 1); - tensor.reset(stride, size); + tensor.resize(cutlass::make_Coord(rows, columns), convert(layout), ldm); } // // Methods // - /// Constructs a workspace for verifying GEMM. + /// Constructs a workspace for verifying GEMM, assumes + /// dense packing. + GemmTestbed(int M_, + int N_, + int K_, + cublasOperation_t layout_a, + cublasOperation_t layout_b, + Scalar alpha_ = Scalar(1), + Scalar beta_ = Scalar(0), + cublasGemmAlgo_t algorithm_ = CUBLAS_GEMM_DEFAULT, + cublasOperation_t layout_c = CUBLAS_OP_N) + : problem_size(K_, N_, M_, 1), + layout_A(layout_a), + layout_B(layout_b), + alpha(alpha_), + beta(beta_), + algorithm(algorithm_), + batch_count(1), + batch_stride_A(static_cast(0)), + batch_stride_B(static_cast(0)), + batch_stride_C(static_cast(0)) { + status = cublasCreate(&handle); + if (status != CUBLAS_STATUS_SUCCESS) { + throw cutlass::cuda_exception("Failed to create CUBLAS handle"); + } + + resize(A, M_, K_, layout_a); + resize(B, K_, N_, layout_b); + resize(C_initial, M_, N_, layout_c); + resize(ref_host, M_, N_, layout_c); + resize(ref_cublas, M_, N_, layout_c); + resize(computed, M_, N_, layout_c); + } + + /// Constructs a workspace for verifying GEMM, assumes + /// dense packing. + GemmTestbed(cublasHandle_t handle_, + int M_, + int N_, + int K_, + cublasOperation_t layout_a, + cublasOperation_t layout_b, + Scalar alpha_ = Scalar(1), + Scalar beta_ = Scalar(0), + cublasGemmAlgo_t algorithm_ = CUBLAS_GEMM_DEFAULT, + cublasOperation_t layout_c = CUBLAS_OP_N) + : status(CUBLAS_STATUS_SUCCESS), + handle(handle_), + problem_size(K_, N_, M_, 1), + layout_A(layout_a), + layout_B(layout_b), + alpha(alpha_), + beta(beta_), + algorithm(algorithm_), + batch_count(1), + batch_stride_A(static_cast(0)), + batch_stride_B(static_cast(0)), + batch_stride_C(static_cast(0)) { + + resize(A, M_, K_ * batch_count, layout_a); + resize(B, K_ * batch_count, N_, layout_b); + resize(C_initial, M_, N_ * batch_count, layout_c); + resize(ref_host, M_, N_ * batch_count, layout_c); + resize(ref_cublas, M_, N_ * batch_count, layout_c); + resize(computed, M_, N_ * batch_count, layout_c); + } + + /// Constructs a workspace for verifying GEMM with arbitrary strides GemmTestbed(int M_, int N_, int K_, @@ -250,7 +370,16 @@ struct GemmTestbed { Scalar beta_ = Scalar(0), cublasGemmAlgo_t algorithm_ = CUBLAS_GEMM_DEFAULT, cublasOperation_t layout_c = CUBLAS_OP_N) - : layout_A(layout_a), layout_B(layout_b), alpha(alpha_), beta(beta_), algorithm(algorithm_) { + : problem_size(K_, N_, M_, 1), + layout_A(layout_a), + layout_B(layout_b), + alpha(alpha_), + beta(beta_), + algorithm(algorithm_), + batch_count(1), + batch_stride_A(static_cast(0)), + batch_stride_B(static_cast(0)), + batch_stride_C(static_cast(0)) { status = cublasCreate(&handle); if (status != CUBLAS_STATUS_SUCCESS) { throw cutlass::cuda_exception("Failed to create CUBLAS handle"); @@ -264,39 +393,119 @@ struct GemmTestbed { resize(computed, M_, N_, layout_c, ldc); } - ~GemmTestbed() { status = cublasDestroy(handle); } + /// Constructs a workspace for verifying GEMM with arbitrary strides + GemmTestbed(cublasHandle_t handle_, + int M_, + int N_, + int K_, + int ldc, + cublasOperation_t layout_a, + int lda, + cublasOperation_t layout_b, + int ldb, + Scalar alpha_ = Scalar(1), + Scalar beta_ = Scalar(0), + cublasGemmAlgo_t algorithm_ = CUBLAS_GEMM_DEFAULT, + cublasOperation_t layout_c = CUBLAS_OP_N) + : status(CUBLAS_STATUS_SUCCESS), + handle(handle_), + problem_size(K_, N_, M_, 1), + alpha(alpha_), + beta(beta_), + algorithm(algorithm_), + batch_count(1), + batch_stride_A(static_cast(0)), + batch_stride_B(static_cast(0)), + batch_stride_C(static_cast(0)) { + + resize(A, M_, K_ * batch_count, layout_a); + resize(B, K_ * batch_count, N_, layout_b); + resize(C_initial, M_, N_ * batch_count, layout_c); + resize(ref_host, M_, N_ * batch_count, layout_c); + resize(ref_cublas, M_, N_ * batch_count, layout_c); + resize(computed, M_, N_ * batch_count, layout_c); + } + + /// Constructs a workspace for verifying strided batched GEMM, assumes + /// dense packing. + /// batches are "concated" along K for matrix A and matrix B, and along N for matrix C + /// a full implementation of strided batched GEMM should handle other corner cases + GemmTestbed(int M_, + int N_, + int K_, + int batch_count_, + cublasOperation_t layout_a, + cublasOperation_t layout_b, + Scalar alpha_ = Scalar(1), + Scalar beta_ = Scalar(0), + cublasGemmAlgo_t algorithm_ = CUBLAS_GEMM_DEFAULT, + cublasOperation_t layout_c = CUBLAS_OP_N) + : problem_size(K_, N_, M_, batch_count_), + layout_A(layout_a), + layout_B(layout_b), + alpha(alpha_), + beta(beta_), + algorithm(algorithm_), + batch_count(batch_count_) { + + status = cublasCreate(&handle); + if (status != CUBLAS_STATUS_SUCCESS) { + throw cutlass::cuda_exception("Failed to create CUBLAS handle"); + } + + resize(A, M_, K_ * batch_count, layout_a); + resize(B, K_ * batch_count, N_, layout_b); + resize(C_initial, M_, N_ * batch_count, layout_c); + resize(ref_host, M_, N_ * batch_count, layout_c); + resize(ref_cublas, M_, N_ * batch_count, layout_c); + resize(computed, M_, N_ * batch_count, layout_c); + + batch_stride_A = (layout_a == CUBLAS_OP_N) ? M_ * K_ : K_; + batch_stride_B = (layout_b == CUBLAS_OP_N) ? K_ : K_ * N_; + batch_stride_C = M_ * N_; + } + + /// Destructs the GEMM testbed + ~GemmTestbed() { + if (status != CUBLAS_STATUS_NOT_INITIALIZED) { + status = cublasDestroy(handle); + } + } /// Returns true if the last CUBLAS call returned successfully bool good() const { return status == CUBLAS_STATUS_SUCCESS; } /// Returns a pointer to the A operand - typename HostTensorA::DeviceType* ptr_A() const { return A.device_data(); } + typename HostMatrixA::DeviceType* ptr_A() const { return A.device_data(); } /// Stride of A matrix - int lda() const { return std::max(A.stride(HostTensorA::Dim_H), A.stride(HostTensorA::Dim_W)); } + int lda() const { return A.leading_dim(); } /// Returns a pointer to the B operand - typename HostTensorB::DeviceType* ptr_B() const { return B.device_data(); } + typename HostMatrixB::DeviceType* ptr_B() const { return B.device_data(); } /// Stride of B matrix - int ldb() const { return std::max(B.stride(HostTensorB::Dim_H), B.stride(HostTensorB::Dim_W)); } + int ldb() const { return B.leading_dim(); } /// Returns a pointer to the initial state of the result tensor in device memory - typename HostTensorC::DeviceType* ptr_C_initial() const { return C_initial.device_data(); } + typename HostMatrixC::DeviceType* ptr_C_initial() const { return C_initial.device_data(); } /// Returns a pointer to the result tensor in device memory - typename HostTensorC::DeviceType* ptr_computed() const { return computed.device_data(); } + typename HostMatrixC::DeviceType* ptr_computed() const { return computed.device_data(); } /// Returns a pointer to the result tensor in device memory - typename HostTensorC::DeviceType* ptr_cublas() const { return ref_cublas.device_data(); } + typename HostMatrixC::DeviceType* ptr_cublas() const { return ref_cublas.device_data(); } /// Stride of C matrix int ldc() const { - return std::max(C_initial.stride(HostTensorC::Dim_H), C_initial.stride(HostTensorC::Dim_W)); + //return std::max(C_initial.stride(HostTensorC::Dim_H), C_initial.stride(HostTensorC::Dim_W)); + return C_initial.leading_dim(); } /// Returns the number of flops implied by the computation (1 multiply-accumulate = 2 flops) - uint64_t flops() const { return uint64_t(M()) * uint64_t(N()) * uint64_t(K()) * 2ULL; } + uint64_t flops() const { + return uint64_t(batch_count) * uint64_t(M()) * uint64_t(N()) * uint64_t(K()) * 2ULL; + } /// Computes the speed of the computation in GFLOPs/s double GFLOPs_per_sec(double runtime_ms) const { return double(flops()) / runtime_ms / 1.0e6; } @@ -307,53 +516,151 @@ struct GemmTestbed { /// Matrix layout of B cublasOperation_t layout_b() const { return layout_B; } - /// Number of rows of problem - int M() const { return C_initial.size(HostTensorC::Dim_H); } + /// Number of rows of problem, per batch; assumptions made here that we concat C by adding columns + int M() const { + return problem_size.m(); + } - /// Number of columns of problem - int N() const { return C_initial.size(HostTensorC::Dim_W); } + /// Number of columns of problem, per batch; assumptions made here that we concat C by adding + /// columns + int N() const { + return problem_size.n(); + } - /// Number of columns of problem - int K() const { return A.size(HostTensorA::Dim_W); } + /// Number of columns of problem, per batch; assumptions made here that we concat A by adding + /// columns + int K() const { + return problem_size.k(); + } + + /// Number of batches + int get_batch_count() const { + return problem_size.batch(); + } + + /// + long long int get_batch_stride_A() const { return batch_stride_A; } + + /// + long long int get_batch_stride_B() const { return batch_stride_B; } + + /// + long long int get_batch_stride_C() const { return batch_stride_C; } + + /// /// Initializes data, randomly void initialize(int seed = -1) { - A.fill_random(RandomGenerator(seed)); - B.fill_random(RandomGenerator(seed + 11)); - C_initial.fill_random(RandomGenerator(seed + 13)); + + // Initialize the source matrix with a uniform distribution + cutlass::Distribution dist; + dist.set_uniform(-8, 8); + + cutlass::reference::host::TensorInitialize(A.host_view(), seed, dist); + cutlass::reference::host::TensorInitialize(B.host_view(), seed + 11, dist); + cutlass::reference::host::TensorInitialize(C_initial.host_view(), seed + 13, dist); + + A.sync_device(); + B.sync_device(); + C_initial.sync_device(); + } + + /// Initializes binary data + void initialize_binary(int seed = -1) { + //A.fill_random(RandomBitGenerator(seed)); + //B.fill_random(RandomBitGenerator(seed + 11)); + //C_initial.fill_random(RandomGenerator(seed + 13)); + A.fill_sequential(); + B.fill_sequential(); + C_initial.fill(0); + } + + /// Initializes integer data (sequential for now) + void initialize_integer(int seed =-1) { + A.fill_sequential(); + B.fill_sequential(); + C_initial.fill(0); } /// Computes the matrix product on the host void compute_host() { ref_host.fill(C_initial); - ref_host.template gemm(A, B, alpha, beta); + + cutlass::reference::host::Gemm(problem_size, alpha, A.host_ref(), B.host_ref(), beta, ref_host.host_ref(), Accumulator(0)); } /// Excutes an equivalent GEMM using cuBLAS bool execute_cublas() { - status = cublasGemmEx(handle, - layout_a(), - layout_b(), - M(), - N(), - K(), - &alpha, - ptr_A(), - cutlass::TypeTraits::cublas_type, - lda(), - ptr_B(), - cutlass::TypeTraits::cublas_type, - ldb(), - &beta, - ref_cublas.device_data(), - cutlass::TypeTraits::cublas_type, - ldc(), - cutlass::TypeTraits::cublas_type, - algorithm); + if (batch_count == 1) { + status = cublasGemmEx(handle, + layout_a(), + layout_b(), + M(), + N(), + K(), + &alpha, + ptr_A(), + cutlass::TypeTraits::cublas_type, + lda(), + ptr_B(), + cutlass::TypeTraits::cublas_type, + ldb(), + &beta, + ref_cublas.device_data(), + cutlass::TypeTraits::cublas_type, + ldc(), + cutlass::TypeTraits::cublas_type, + algorithm); - return status == CUBLAS_STATUS_SUCCESS; + return status == CUBLAS_STATUS_SUCCESS; + } else { + // call strided batched gemm + status = cublasGemmStridedBatchedTemplate(handle, + layout_a(), + layout_b(), + M(), + N(), + K(), + &alpha, + ptr_A(), + lda(), + batch_stride_A, + ptr_B(), + ldb(), + batch_stride_B, + &beta, + ref_cublas.device_data(), + ldc(), + batch_stride_C, + batch_count); + + return status == CUBLAS_STATUS_SUCCESS; + } } + /// Helper function to use cublasGemmStridedBatched + cublasStatus_t cublasGemmStridedBatchedTemplate(cublasHandle_t handle, + cublasOperation_t transa, + cublasOperation_t transb, + int M, + int N, + int K, + const Scalar *alpha, + const typename HostMatrixA::DeviceType *ptr_A, + int lda, + long long int stride_A, + const typename HostMatrixB::DeviceType *ptr_B, + int ldb, + long long int stride_B, + const Scalar *beta, + typename HostMatrixC::DeviceType *ptr_C, + int ldc, + long long int stride_C, + int batchCount) { + return CUBLAS_STATUS_NOT_SUPPORTED; + } + + /// Computes the matrix product using cuBLAS void compute_cublas() { ref_cublas.fill(C_initial); @@ -374,8 +681,11 @@ struct GemmTestbed { << (layout_b() == CUBLAS_OP_N ? "n" : "t") << "_" << typeid(AType).name() << "_" << typeid(BType).name() << "_" << typeid(CType).name() << "_" << typeid(Accumulator).name() << "_" << typeid(Scalar).name() << "_" << M() << "x" << N() << "x" << K(); - - return ss.str(); + //make sure there is no space in the ss + std::string thisString = ss.str(); + std::replace(thisString.begin(), thisString.end(), ' ', '_'); + std::replace(thisString.begin(), thisString.end(), ':', '_'); + return thisString; } /// Writes the workspace to an ostream @@ -389,8 +699,8 @@ struct GemmTestbed { /// Outputs each mismatching element std::ostream& write_errors(std::ostream& out, - cutlass::HostTensorView const& experimental, - cutlass::HostTensorView const& ref) const { + HostMatrixC const& experimental, + HostMatrixC const& ref) const { PrintErrors printer(out, ref, experimental); computed.visit(printer); @@ -419,8 +729,8 @@ struct GemmTestbed { } /// Saves the workspace to files - void save_workspace(cutlass::HostTensorView const& experimental, - cutlass::HostTensorView const& ref) { + void save_workspace(HostMatrixC const& experimental, + HostMatrixC const& ref) { std::string name = workspace_name(); std::string results_name = name + "_results.txt"; @@ -453,6 +763,7 @@ struct GemmTestbed { ref_cublas.sync_host(); computed.sync_host(); + bool passed = computed.bit_equals(ref_cublas); if ((!passed && save_on_error) || always_print) { @@ -494,22 +805,116 @@ struct GemmTestbed { bool has_cublas_support() const { return cutlass::platform::is_same::value; } }; +// +//specialization for cublasGemmStridedBatchedTemplate +template<> inline cublasStatus_t GemmTestbed::cublasGemmStridedBatchedTemplate(cublasHandle_t handle, + cublasOperation_t transa, + cublasOperation_t transb, + int M, + int N, + int K, + const float *alpha, + const float *ptr_A, + int lda, + long long int stride_A, + const float *ptr_B, + int ldb, + long long int stride_B, + const float *beta, + float *ptr_C, + int ldc, + long long int stride_C, + int batchCount) { + return cublasSgemmStridedBatched(handle, + transa, + transb, + M, N, K, + alpha, + ptr_A, + lda, + stride_A, + ptr_B, + ldb, + stride_B, + beta, + ptr_C, + ldc, + stride_C, + batchCount); +} + +template<> inline cublasStatus_t GemmTestbed::cublasGemmStridedBatchedTemplate(cublasHandle_t handle, + cublasOperation_t transa, + cublasOperation_t transb, + int M, + int N, + int K, + const double *alpha, + const double *ptr_A, + int lda, + long long int stride_A, + const double *ptr_B, + int ldb, + long long int stride_B, + const double *beta, + double *ptr_C, + int ldc, + long long int stride_C, + int batchCount) { + return cublasDgemmStridedBatched(handle, + transa, + transb, + M, N, K, + alpha, + ptr_A, + lda, + stride_A, + ptr_B, + ldb, + stride_B, + beta, + ptr_C, + ldc, + stride_C, + batchCount); +} + +template<> inline cublasStatus_t GemmTestbed::cublasGemmStridedBatchedTemplate(cublasHandle_t handle, + cublasOperation_t transa, + cublasOperation_t transb, + int M, + int N, + int K, + const cutlass::half_t *alpha, + const half *ptr_A, + int lda, + long long int stride_A, + const half *ptr_B, + int ldb, + long long int stride_B, + const cutlass::half_t *beta, + half *ptr_C, + int ldc, + long long int stride_C, + int batchCount) { + half temp_alpha = alpha->operator half(); + half temp_beta = beta->operator half(); + return cublasHgemmStridedBatched(handle, + transa, + transb, + M, N, K, + &temp_alpha, + ptr_A, + lda, + stride_A, + ptr_B, + ldb, + stride_B, + &temp_beta, + ptr_C, + ldc, + stride_C, + batchCount); +} + } // namespace test - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -namespace cutlass { -inline cublasOperation_t convert(cutlass::MatrixLayout::Kind layout) { - switch (layout) { - case cutlass::MatrixLayout::kRowMajor: - return CUBLAS_OP_T; - case cutlass::MatrixLayout::kColumnMajor: - return CUBLAS_OP_N; - default: - break; - } - return CUBLAS_OP_N; -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -} diff --git a/tools/test/unit/gemm/hgemm_128x128x16.cu b/tools/test/unit/gemm/hgemm_128x128x16.cu index 1d72971d2..7715cf836 100644 --- a/tools/test/unit/gemm/hgemm_128x128x16.cu +++ b/tools/test/unit/gemm/hgemm_128x128x16.cu @@ -22,30 +22,12 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include -#include - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -TEST(Hgemm_128x128x16, hgemm_2x2x2_nt) { - typedef cutlass::gemm::HgemmTraits > - HgemmTraits; - run_gemm(2, 2, 2); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -TEST(Hgemm_128x128x16, hgemm_128x128x8_nt) { - typedef cutlass::gemm::HgemmTraits > - HgemmTraits; - run_gemm(128, 128, 8); -} +#include "cutlass_unit_test.h" +#include "tools/util/half.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/hgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/hgemm_128x128x8.cu b/tools/test/unit/gemm/hgemm_128x128x8.cu index 266cce8a1..341ae2e95 100644 --- a/tools/test/unit/gemm/hgemm_128x128x8.cu +++ b/tools/test/unit/gemm/hgemm_128x128x8.cu @@ -22,12 +22,12 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "tools/util/half.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/hgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/hgemm_128x32x8.cu b/tools/test/unit/gemm/hgemm_128x32x8.cu index 557415ea6..c9ab620ea 100644 --- a/tools/test/unit/gemm/hgemm_128x32x8.cu +++ b/tools/test/unit/gemm/hgemm_128x32x8.cu @@ -22,11 +22,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/hgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/hgemm_128x64x8.cu b/tools/test/unit/gemm/hgemm_128x64x8.cu index ea4968b4e..e1b1540a3 100644 --- a/tools/test/unit/gemm/hgemm_128x64x8.cu +++ b/tools/test/unit/gemm/hgemm_128x64x8.cu @@ -22,11 +22,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/hgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/igemm_128x128x32.cu b/tools/test/unit/gemm/igemm_128x128x32.cu index aad3d4929..6c891a45c 100644 --- a/tools/test/unit/gemm/igemm_128x128x32.cu +++ b/tools/test/unit/gemm/igemm_128x128x32.cu @@ -22,16 +22,17 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/igemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Igemm_128x128x32, igemm_128x128x4_nt) { typedef cutlass::gemm::IgemmTraits, int, cutlass::gemm::LinearScaling > IgemmTraits; run_gemm(128, 128, 4); @@ -41,7 +42,7 @@ TEST(Igemm_128x128x32, igemm_128x128x4_nt) { TEST(Igemm_128x128x32, igemm_128x128x32_nt) { typedef cutlass::gemm::IgemmTraits, int, cutlass::gemm::LinearScaling > IgemmTraits; run_gemm(128, 128, 32); @@ -51,7 +52,7 @@ TEST(Igemm_128x128x32, igemm_128x128x32_nt) { TEST(Igemm_128x128x32, igemm_128x128x36_nt) { typedef cutlass::gemm::IgemmTraits, int, cutlass::gemm::LinearScaling > IgemmTraits; run_gemm(128, 128, 36); @@ -61,7 +62,7 @@ TEST(Igemm_128x128x32, igemm_128x128x36_nt) { TEST(Igemm_128x128x32, igemm_128x128x64_nt) { typedef cutlass::gemm::IgemmTraits, int, cutlass::gemm::LinearScaling > IgemmTraits; run_gemm(128, 128, 64); @@ -71,7 +72,7 @@ TEST(Igemm_128x128x32, igemm_128x128x64_nt) { TEST(Igemm_128x128x32, igemm_128x128x256_nt) { typedef cutlass::gemm::IgemmTraits, int, cutlass::gemm::LinearScaling > IgemmTraits; run_gemm(128, 128, 256); @@ -81,7 +82,7 @@ TEST(Igemm_128x128x32, igemm_128x128x256_nt) { TEST(Igemm_128x128x32, igemm_256x128x64_nt) { typedef cutlass::gemm::IgemmTraits, int, cutlass::gemm::LinearScaling > IgemmTraits; run_gemm(256, 128, 64); @@ -91,7 +92,7 @@ TEST(Igemm_128x128x32, igemm_256x128x64_nt) { TEST(Igemm_128x128x32, igemm_128x256x64_nt) { typedef cutlass::gemm::IgemmTraits, int, cutlass::gemm::LinearScaling > IgemmTraits; run_gemm(128, 256, 64); @@ -101,7 +102,7 @@ TEST(Igemm_128x128x32, igemm_128x256x64_nt) { TEST(Igemm_128x128x32, igemm_256x256x64_nt) { typedef cutlass::gemm::IgemmTraits, int, cutlass::gemm::LinearScaling > IgemmTraits; run_gemm(256, 256, 64); @@ -189,6 +190,12 @@ TEST(Igemm_128x128x32, igemm_256x256x64_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// +// NB: I have removed tests in which k=1. These result in the test environment definining matrices +// in which ld{a,b} = 1 which cannot be launched by cuBLAS. +// +// This problem size remains untested. --akerr +// + TEST(Igemm_128x128x32, igemm_128x128x4_tn) { typedef cutlass::gemm::IgemmTraits, int, cutlass::gemm::LinearScaling > IgemmTraits; run_gemm(128, 128, 32); @@ -291,7 +298,7 @@ TEST(Igemm_128x128x32, igemm_128x128x32_tt) { TEST(Igemm_128x128x32, igemm_128x128x36_tt) { typedef cutlass::gemm::IgemmTraits, int, cutlass::gemm::LinearScaling > IgemmTraits; run_gemm(128, 128, 36); @@ -301,7 +308,7 @@ TEST(Igemm_128x128x32, igemm_128x128x36_tt) { TEST(Igemm_128x128x32, igemm_128x128x64_tt) { typedef cutlass::gemm::IgemmTraits, int, cutlass::gemm::LinearScaling > IgemmTraits; run_gemm(128, 128, 64); @@ -311,7 +318,7 @@ TEST(Igemm_128x128x32, igemm_128x128x64_tt) { TEST(Igemm_128x128x32, igemm_128x128x256_tt) { typedef cutlass::gemm::IgemmTraits, int, cutlass::gemm::LinearScaling > IgemmTraits; run_gemm(128, 128, 256); @@ -321,7 +328,7 @@ TEST(Igemm_128x128x32, igemm_128x128x256_tt) { TEST(Igemm_128x128x32, igemm_256x128x64_tt) { typedef cutlass::gemm::IgemmTraits, int, cutlass::gemm::LinearScaling > IgemmTraits; run_gemm(256, 128, 64); @@ -331,7 +338,7 @@ TEST(Igemm_128x128x32, igemm_256x128x64_tt) { TEST(Igemm_128x128x32, igemm_128x256x64_tt) { typedef cutlass::gemm::IgemmTraits, int, cutlass::gemm::LinearScaling > IgemmTraits; run_gemm(128, 256, 64); @@ -341,7 +348,7 @@ TEST(Igemm_128x128x32, igemm_128x256x64_tt) { TEST(Igemm_128x128x32, igemm_256x256x64_tt) { typedef cutlass::gemm::IgemmTraits, int, cutlass::gemm::LinearScaling > IgemmTraits; run_gemm(256, 256, 64); diff --git a/tools/test/unit/gemm/igemm_128x128x32_float.cu b/tools/test/unit/gemm/igemm_128x128x32_float.cu index 9367e5d37..08b7dbff2 100644 --- a/tools/test/unit/gemm/igemm_128x128x32_float.cu +++ b/tools/test/unit/gemm/igemm_128x128x32_float.cu @@ -22,17 +22,17 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/igemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Igemm_128x128x32_float, igemm_128x128x4_nt) { typedef cutlass::gemm::IgemmTraits, float> IgemmTraits; run_gemm(128, 128, 4); @@ -42,7 +42,7 @@ TEST(Igemm_128x128x32_float, igemm_128x128x4_nt) { TEST(Igemm_128x128x32_float, igemm_128x128x32_nt) { typedef cutlass::gemm::IgemmTraits, float> IgemmTraits; run_gemm(128, 128, 32); @@ -52,7 +52,7 @@ TEST(Igemm_128x128x32_float, igemm_128x128x32_nt) { TEST(Igemm_128x128x32_float, igemm_128x128x36_nt) { typedef cutlass::gemm::IgemmTraits, float> IgemmTraits; run_gemm(128, 128, 36); @@ -62,7 +62,7 @@ TEST(Igemm_128x128x32_float, igemm_128x128x36_nt) { TEST(Igemm_128x128x32_float, igemm_128x128x64_nt) { typedef cutlass::gemm::IgemmTraits, float> IgemmTraits; run_gemm(128, 128, 64); @@ -72,7 +72,7 @@ TEST(Igemm_128x128x32_float, igemm_128x128x64_nt) { TEST(Igemm_128x128x32_float, igemm_128x128x256_nt) { typedef cutlass::gemm::IgemmTraits, float> IgemmTraits; run_gemm(128, 128, 256); @@ -82,7 +82,7 @@ TEST(Igemm_128x128x32_float, igemm_128x128x256_nt) { TEST(Igemm_128x128x32_float, igemm_256x128x64_nt) { typedef cutlass::gemm::IgemmTraits, float> IgemmTraits; run_gemm(256, 128, 64); @@ -92,7 +92,7 @@ TEST(Igemm_128x128x32_float, igemm_256x128x64_nt) { TEST(Igemm_128x128x32_float, igemm_128x256x64_nt) { typedef cutlass::gemm::IgemmTraits, float> IgemmTraits; run_gemm(128, 256, 64); @@ -102,7 +102,7 @@ TEST(Igemm_128x128x32_float, igemm_128x256x64_nt) { TEST(Igemm_128x128x32_float, igemm_256x256x64_nt) { typedef cutlass::gemm::IgemmTraits, float> IgemmTraits; run_gemm(256, 256, 64); @@ -190,6 +190,12 @@ TEST(Igemm_128x128x32_float, igemm_256x256x64_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// +// NB: I have removed tests in which k=1. These result in the test environment definining matrices +// in which ld{a,b} = 1 which cannot be launched by cuBLAS. +// +// This problem size remains untested. --akerr +// + TEST(Igemm_128x128x32_float, igemm_128x128x4_tn) { typedef cutlass::gemm::IgemmTraits, float> IgemmTraits; run_gemm(128, 128, 32); @@ -292,7 +298,7 @@ TEST(Igemm_128x128x32_float, igemm_128x128x32_tt) { TEST(Igemm_128x128x32_float, igemm_128x128x36_tt) { typedef cutlass::gemm::IgemmTraits, float> IgemmTraits; run_gemm(128, 128, 36); @@ -302,7 +308,7 @@ TEST(Igemm_128x128x32_float, igemm_128x128x36_tt) { TEST(Igemm_128x128x32_float, igemm_128x128x64_tt) { typedef cutlass::gemm::IgemmTraits, float> IgemmTraits; run_gemm(128, 128, 64); @@ -312,7 +318,7 @@ TEST(Igemm_128x128x32_float, igemm_128x128x64_tt) { TEST(Igemm_128x128x32_float, igemm_128x128x256_tt) { typedef cutlass::gemm::IgemmTraits, float> IgemmTraits; run_gemm(128, 128, 256); @@ -322,7 +328,7 @@ TEST(Igemm_128x128x32_float, igemm_128x128x256_tt) { TEST(Igemm_128x128x32_float, igemm_256x128x64_tt) { typedef cutlass::gemm::IgemmTraits, float> IgemmTraits; run_gemm(256, 128, 64); @@ -332,7 +338,7 @@ TEST(Igemm_128x128x32_float, igemm_256x128x64_tt) { TEST(Igemm_128x128x32_float, igemm_128x256x64_tt) { typedef cutlass::gemm::IgemmTraits, float> IgemmTraits; run_gemm(128, 256, 64); @@ -342,7 +348,7 @@ TEST(Igemm_128x128x32_float, igemm_128x256x64_tt) { TEST(Igemm_128x128x32_float, igemm_256x256x64_tt) { typedef cutlass::gemm::IgemmTraits, float> IgemmTraits; diff --git a/tools/test/unit/gemm/igemm_128x128x32_int8.cu b/tools/test/unit/gemm/igemm_128x128x32_int8.cu index bf053a6b5..fbf5ca406 100644 --- a/tools/test/unit/gemm/igemm_128x128x32_int8.cu +++ b/tools/test/unit/gemm/igemm_128x128x32_int8.cu @@ -22,17 +22,17 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/igemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Igemm_128x128x32_int8, igemm_128x128x4_nt) { typedef cutlass::gemm::IgemmTraits, int8_t> IgemmTraits; run_gemm(128, 128, 4); @@ -42,7 +42,7 @@ TEST(Igemm_128x128x32_int8, igemm_128x128x4_nt) { TEST(Igemm_128x128x32_int8, igemm_128x128x32_nt) { typedef cutlass::gemm::IgemmTraits, int8_t> IgemmTraits; run_gemm(128, 128, 32); @@ -52,7 +52,7 @@ TEST(Igemm_128x128x32_int8, igemm_128x128x32_nt) { TEST(Igemm_128x128x32_int8, igemm_128x128x36_nt) { typedef cutlass::gemm::IgemmTraits, int8_t> IgemmTraits; run_gemm(128, 128, 36); @@ -62,7 +62,7 @@ TEST(Igemm_128x128x32_int8, igemm_128x128x36_nt) { TEST(Igemm_128x128x32_int8, igemm_128x128x64_nt) { typedef cutlass::gemm::IgemmTraits, int8_t> IgemmTraits; run_gemm(128, 128, 64); @@ -72,7 +72,7 @@ TEST(Igemm_128x128x32_int8, igemm_128x128x64_nt) { TEST(Igemm_128x128x32_int8, igemm_128x128x256_nt) { typedef cutlass::gemm::IgemmTraits, int8_t> IgemmTraits; run_gemm(128, 128, 256); @@ -82,7 +82,7 @@ TEST(Igemm_128x128x32_int8, igemm_128x128x256_nt) { TEST(Igemm_128x128x32_int8, igemm_256x128x64_nt) { typedef cutlass::gemm::IgemmTraits, int8_t> IgemmTraits; run_gemm(256, 128, 64); @@ -92,7 +92,7 @@ TEST(Igemm_128x128x32_int8, igemm_256x128x64_nt) { TEST(Igemm_128x128x32_int8, igemm_128x256x64_nt) { typedef cutlass::gemm::IgemmTraits, int8_t> IgemmTraits; run_gemm(128, 256, 64); @@ -102,7 +102,7 @@ TEST(Igemm_128x128x32_int8, igemm_128x256x64_nt) { TEST(Igemm_128x128x32_int8, igemm_256x256x64_nt) { typedef cutlass::gemm::IgemmTraits, int8_t> IgemmTraits; run_gemm(256, 256, 64); @@ -190,6 +190,14 @@ TEST(Igemm_128x128x32_int8, igemm_256x256x64_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// +// NB: I have removed tests in which k=1. These result in the test environment definining matrices +// in which ld{a,b} = 1 which cannot be launched by cuBLAS. +// +// This problem size remains untested. --akerr +// + +//////////////////////////////////////////////////////////////////////////////////////////////////// + TEST(Igemm_128x128x32_int8, igemm_128x128x4_tn) { typedef cutlass::gemm::IgemmTraits, int8_t> IgemmTraits; run_gemm(128, 128, 32); @@ -292,7 +300,7 @@ TEST(Igemm_128x128x32_int8, igemm_128x128x32_tt) { TEST(Igemm_128x128x32_int8, igemm_128x128x36_tt) { typedef cutlass::gemm::IgemmTraits, int8_t> IgemmTraits; run_gemm(128, 128, 36); @@ -302,7 +310,7 @@ TEST(Igemm_128x128x32_int8, igemm_128x128x36_tt) { TEST(Igemm_128x128x32_int8, igemm_128x128x64_tt) { typedef cutlass::gemm::IgemmTraits, int8_t> IgemmTraits; run_gemm(128, 128, 64); @@ -312,7 +320,7 @@ TEST(Igemm_128x128x32_int8, igemm_128x128x64_tt) { TEST(Igemm_128x128x32_int8, igemm_128x128x256_tt) { typedef cutlass::gemm::IgemmTraits, int8_t> IgemmTraits; run_gemm(128, 128, 256); @@ -322,7 +330,7 @@ TEST(Igemm_128x128x32_int8, igemm_128x128x256_tt) { TEST(Igemm_128x128x32_int8, igemm_256x128x64_tt) { typedef cutlass::gemm::IgemmTraits, int8_t> IgemmTraits; run_gemm(256, 128, 64); @@ -332,7 +340,7 @@ TEST(Igemm_128x128x32_int8, igemm_256x128x64_tt) { TEST(Igemm_128x128x32_int8, igemm_128x256x64_tt) { typedef cutlass::gemm::IgemmTraits, int8_t> IgemmTraits; run_gemm(128, 256, 64); @@ -342,7 +350,7 @@ TEST(Igemm_128x128x32_int8, igemm_128x256x64_tt) { TEST(Igemm_128x128x32_int8, igemm_256x256x64_tt) { typedef cutlass::gemm::IgemmTraits, int8_t> IgemmTraits; run_gemm(256, 256, 64); diff --git a/tools/test/unit/gemm/igemm_128x32x32.cu b/tools/test/unit/gemm/igemm_128x32x32.cu index 448d8f03c..dabeb07df 100644 --- a/tools/test/unit/gemm/igemm_128x32x32.cu +++ b/tools/test/unit/gemm/igemm_128x32x32.cu @@ -22,17 +22,17 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/igemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Igemm_128x32x32, igemm_128x32x32x4_nt) { typedef cutlass::gemm::IgemmTraits, int> IgemmTraits; run_gemm(128, 32, 4); @@ -42,7 +42,7 @@ TEST(Igemm_128x32x32, igemm_128x32x32x4_nt) { TEST(Igemm_128x32x32, igemm_128x32x32_nt) { typedef cutlass::gemm::IgemmTraits, int> IgemmTraits; run_gemm(128, 32, 20); @@ -52,7 +52,7 @@ TEST(Igemm_128x32x32, igemm_128x32x32_nt) { TEST(Igemm_128x32x32, igemm_128x32x36_nt) { typedef cutlass::gemm::IgemmTraits, int> IgemmTraits; run_gemm(128, 32, 36); @@ -62,7 +62,7 @@ TEST(Igemm_128x32x32, igemm_128x32x36_nt) { TEST(Igemm_128x32x32, igemm_128x32x64_nt) { typedef cutlass::gemm::IgemmTraits, int> IgemmTraits; run_gemm(128, 32, 64); @@ -72,7 +72,7 @@ TEST(Igemm_128x32x32, igemm_128x32x64_nt) { TEST(Igemm_128x32x32, igemm_128x32x256_nt) { typedef cutlass::gemm::IgemmTraits, int> IgemmTraits; run_gemm(128, 32, 256); @@ -82,7 +82,7 @@ TEST(Igemm_128x32x32, igemm_128x32x256_nt) { TEST(Igemm_128x32x32, igemm_256x32x64_nt) { typedef cutlass::gemm::IgemmTraits, int> IgemmTraits; run_gemm(256, 32, 64); @@ -92,7 +92,7 @@ TEST(Igemm_128x32x32, igemm_256x32x64_nt) { TEST(Igemm_128x32x32, igemm_128x128x32_nt) { typedef cutlass::gemm::IgemmTraits, int> IgemmTraits; run_gemm(128, 128, 32); @@ -102,7 +102,7 @@ TEST(Igemm_128x32x32, igemm_128x128x32_nt) { TEST(Igemm_128x32x32, igemm_256x128x32_nt) { typedef cutlass::gemm::IgemmTraits, int> IgemmTraits; run_gemm(256, 128, 32); @@ -190,6 +190,15 @@ TEST(Igemm_128x32x32, igemm_256x128x32_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// +// FIXME: This test fails in cuBLAS. I don't know why!!! +// TEST(Igemm_128x32x32, igemm_128x32x1_tn) { +// typedef cutlass::gemm::IgemmTraits > IgemmTraits; +// run_gemm(128, 32, 1); +// } + +//////////////////////////////////////////////////////////////////////////////////////////////////// + TEST(Igemm_128x32x32, igemm_128x32x4_tn) { typedef cutlass::gemm::IgemmTraits, int> IgemmTraits; run_gemm(128, 32, 32); @@ -292,7 +301,7 @@ TEST(Igemm_128x32x32, igemm_128x32x32_tt) { TEST(Igemm_128x32x32, igemm_128x32x36_tt) { typedef cutlass::gemm::IgemmTraits, int> IgemmTraits; run_gemm(128, 32, 36); @@ -302,7 +311,7 @@ TEST(Igemm_128x32x32, igemm_128x32x36_tt) { TEST(Igemm_128x32x32, igemm_128x32x64_tt) { typedef cutlass::gemm::IgemmTraits, int> IgemmTraits; run_gemm(128, 32, 64); @@ -312,7 +321,7 @@ TEST(Igemm_128x32x32, igemm_128x32x64_tt) { TEST(Igemm_128x32x32, igemm_128x32x256_tt) { typedef cutlass::gemm::IgemmTraits, int> IgemmTraits; run_gemm(128, 32, 256); @@ -322,7 +331,7 @@ TEST(Igemm_128x32x32, igemm_128x32x256_tt) { TEST(Igemm_128x32x32, igemm_256x32x64_tt) { typedef cutlass::gemm::IgemmTraits, int> IgemmTraits; run_gemm(256, 32, 64); @@ -332,7 +341,7 @@ TEST(Igemm_128x32x32, igemm_256x32x64_tt) { TEST(Igemm_128x32x32, igemm_128x128x32_tt) { typedef cutlass::gemm::IgemmTraits, int> IgemmTraits; run_gemm(128, 128, 32); @@ -342,7 +351,7 @@ TEST(Igemm_128x32x32, igemm_128x128x32_tt) { TEST(Igemm_128x32x32, igemm_256x128x32_tt) { typedef cutlass::gemm::IgemmTraits, int> IgemmTraits; run_gemm(256, 128, 32); diff --git a/tools/test/unit/gemm/igemm_128x64x32.cu b/tools/test/unit/gemm/igemm_128x64x32.cu index 9e080ac40..279daafec 100644 --- a/tools/test/unit/gemm/igemm_128x64x32.cu +++ b/tools/test/unit/gemm/igemm_128x64x32.cu @@ -22,11 +22,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/igemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -190,6 +190,15 @@ TEST(Igemm_128x64x32, igemm_256x128x64_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// +// FIXME: This test fails in cuBLAS. I don't know why!!! +// TEST(Igemm_128x64x32, igemm_128x64x1_tn) { +// typedef cutlass::gemm::IgemmTraits > IgemmTraits; +// run_gemm(128, 64, 1); +// } + +//////////////////////////////////////////////////////////////////////////////////////////////////// + TEST(Igemm_128x64x32, igemm_128x64x4_tn) { typedef cutlass::gemm::IgemmTraits -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/igemm_traits.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/gemm.h b/tools/test/unit/gemm/integer_gemm.h similarity index 53% rename from tools/test/unit/gemm/gemm.h rename to tools/test/unit/gemm/integer_gemm.h index 78cdbd11b..18925336b 100644 --- a/tools/test/unit/gemm/gemm.h +++ b/tools/test/unit/gemm/integer_gemm.h @@ -23,57 +23,54 @@ * **************************************************************************************************/ -#include -#include +#include "cutlass/cutlass.h" //////////////////////////////////////////////////////////////////////////////////////////////////// +/// Helper Function to get the number of elements in the scalar. +template +unsigned getElementsPerScalar() { return 1; } + +template<> +unsigned getElementsPerScalar >() { return 8; } + +template<> +unsigned getElementsPerScalar >() { return 8; } + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Function to run GEMM for integer operands template -static void run_gemm( - int m, - int n, - int k, - int lda, - int ldb, - int ldc, - typename test::GemmTestbedTraits::host_type alpha = - typename test::GemmTestbedTraits::host_type(1), - typename test::GemmTestbedTraits::host_type beta = - typename test::GemmTestbedTraits::host_type(0)) { +static void run_integer_gemm(int m, int n, int k, int alpha = 1, int beta = 1) { typedef cutlass::gemm::Gemm Gemm; typename Gemm::Params params; - test::GemmTestbed< - typename test::GemmTestbedTraits< - typename GemmTraits_::GemmConfig::ScalarA>::host_type, // AType - typename test::GemmTestbedTraits< - typename GemmTraits_::GemmConfig::ScalarB>::host_type, // BType - typename test::GemmTestbedTraits< - typename GemmTraits_::Epilogue::ScalarC>::host_type, // CType - typename test::GemmTestbedTraits< - typename GemmTraits_::Epilogue::Accumulators::Element>::host_type, // Accumulator - typename test::GemmTestbedTraits::host_type // Scalar - > + unsigned const elementsPerScalar = + getElementsPerScalar(); + + test::GemmTestbed testbed(m, n, - k, - lda, - ldb, - ldc, - cutlass::convert(GemmTraits_::kLayoutA), - cutlass::convert(GemmTraits_::kLayoutB), + k / elementsPerScalar, + test::convert(GemmTraits_::kLayoutA), + test::convert(GemmTraits_::kLayoutB), alpha, beta); - testbed.initialize(); + // Initializes the input vectors for computation FIXME + testbed.initialize_integer(); - if (testbed.has_cublas_support()) { - EXPECT_TRUE(testbed.verify_host_with_cublas()); - } + // Compute the reference result on the host (CPU) + testbed.compute_host(); params.initialize(testbed.M(), testbed.N(), - testbed.K(), + testbed.K() * elementsPerScalar, testbed.alpha, testbed.ptr_A(), testbed.lda(), @@ -91,28 +88,8 @@ static void run_gemm( ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) << "\n"; - if (testbed.has_cublas_support()) { - ASSERT_TRUE(testbed.verify_with_cublas()); - } else { - ASSERT_TRUE(testbed.verify_with_host()); - } + testbed.computed.sync_host(); + + // Check the results + ASSERT_TRUE(testbed.computed.bit_equals(testbed.ref_host)); } - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -static void run_gemm( - int m, - int n, - int k, - typename test::GemmTestbedTraits::host_type alpha = - typename test::GemmTestbedTraits::host_type(1), - typename test::GemmTestbedTraits::host_type beta = - typename test::GemmTestbedTraits::host_type(0)) { - int lda = GemmTraits_::kLayoutA == cutlass::MatrixLayout::kColumnMajor ? m : k; - int ldb = GemmTraits_::kLayoutB == cutlass::MatrixLayout::kColumnMajor ? k : n; - - run_gemm(m, n, k, lda, ldb, m, alpha, beta); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/run_gemm.h b/tools/test/unit/gemm/run_gemm.h new file mode 100644 index 000000000..debdca5ef --- /dev/null +++ b/tools/test/unit/gemm/run_gemm.h @@ -0,0 +1,244 @@ +/*************************************************************************************************** +* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +* +* Redistribution and use in source and binary forms, with or without modification, are permitted +* provided that the following conditions are met: +* * Redistributions of source code must retain the above copyright notice, this list of +* conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright notice, this list of +* conditions and the following disclaimer in the documentation and/or other materials +* provided with the distribution. +* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used +* to endorse or promote products derived from this software without specific prior written +* permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +**************************************************************************************************/ + +#pragma once + +#include "cutlass/cutlass.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +template +static void run_gemm( + int m, + int n, + int k, + int lda, + int ldb, + int ldc, + typename test::GemmTestbedTraits::host_type alpha = + typename test::GemmTestbedTraits::host_type(1), + typename test::GemmTestbedTraits::host_type beta = + typename test::GemmTestbedTraits::host_type(0)) { + + typedef typename GemmTraits_::KernelClass Gemm; + typename Gemm::Params params; + + test::GemmTestbed< + typename test::GemmTestbedTraits< + typename GemmTraits_::GemmConfig::ScalarA>::host_type, // AType + typename test::GemmTestbedTraits< + typename GemmTraits_::GemmConfig::ScalarB>::host_type, // BType + typename test::GemmTestbedTraits< + typename GemmTraits_::Epilogue::ScalarC>::host_type, // CType + typename test::GemmTestbedTraits< + typename GemmTraits_::Epilogue::Accumulators::Element>::host_type, // Accumulator + typename test::GemmTestbedTraits::host_type // Scalar + > + testbed(m, + n, + k, + lda, + ldb, + ldc, + test::convert(GemmTraits_::kLayoutA), + test::convert(GemmTraits_::kLayoutB), + alpha, + beta); + + testbed.initialize(); + + if (testbed.has_cublas_support()) { + EXPECT_TRUE(testbed.verify_host_with_cublas()); + } + + params.initialize(testbed.M(), + testbed.N(), + testbed.K(), + testbed.alpha, + testbed.ptr_A(), + testbed.lda(), + testbed.ptr_B(), + testbed.ldb(), + testbed.beta, + testbed.ptr_C_initial(), + testbed.ldc(), + testbed.ptr_computed(), + testbed.ldc()); + + Gemm::launch(params); + + cudaError_t result = cudaDeviceSynchronize(); + ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) + << "\n"; + + if (testbed.has_cublas_support()) { + ASSERT_TRUE(testbed.verify_with_cublas()); + } else { + ASSERT_TRUE(testbed.verify_with_host()); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +static void run_gemm( + int m, + int n, + int k, + typename test::GemmTestbedTraits::host_type alpha = + typename test::GemmTestbedTraits::host_type(1.0f), + typename test::GemmTestbedTraits::host_type beta = + typename test::GemmTestbedTraits::host_type(0.0f)) { + //typedef cutlass::gemm::Gemm Gemm; + typedef typename GemmTraits_::KernelClass Gemm; + typename Gemm::Params params; + + typedef test::GemmTestbed< + typename test::GemmTestbedTraits< + typename GemmTraits_::GemmConfig::ScalarA>::host_type, // AType + typename test::GemmTestbedTraits< + typename GemmTraits_::GemmConfig::ScalarB>::host_type, // BType + typename test::GemmTestbedTraits< + typename GemmTraits_::Epilogue::ScalarC>::host_type, // CType + typename test::GemmTestbedTraits< + typename GemmTraits_::Epilogue::Accumulators::Element>::host_type, // Accumulator + typename test::GemmTestbedTraits::host_type // Scalar + > GemmTestbed; + + GemmTestbed testbed(m, + n, + k, + test::convert(GemmTraits_::kLayoutA), + test::convert(GemmTraits_::kLayoutB), + alpha, + beta); + + testbed.initialize(); + + if (testbed.has_cublas_support()) { + EXPECT_TRUE(testbed.verify_host_with_cublas()); + } + + params.initialize(testbed.M(), + testbed.N(), + testbed.K(), + testbed.alpha, + testbed.ptr_A(), + testbed.lda(), + testbed.ptr_B(), + testbed.ldb(), + testbed.beta, + testbed.ptr_C_initial(), + testbed.ldc(), + testbed.ptr_computed(), + testbed.ldc()); + + Gemm::launch(params); + + cudaError_t result = cudaDeviceSynchronize(); + ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) + << "\n"; + + if (testbed.has_cublas_support()) { + ASSERT_TRUE(testbed.verify_with_cublas()); + } else { + ASSERT_TRUE(testbed.verify_with_host()); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +static void run_batched_strided_gemm( + int m, + int n, + int k, + int batch_count, + typename test::GemmTestbedTraits::host_type alpha = + typename test::GemmTestbedTraits::host_type(1), + typename test::GemmTestbedTraits::host_type beta = + typename test::GemmTestbedTraits::host_type(0)) { + //typedef cutlass::gemm::Gemm Gemm; + typedef typename GemmTraits_::KernelClass Gemm; + typename Gemm::Params params; + test::GemmTestbed< + typename test::GemmTestbedTraits< + typename GemmTraits_::GemmConfig::ScalarA>::host_type, // AType + typename test::GemmTestbedTraits< + typename GemmTraits_::GemmConfig::ScalarB>::host_type, // BType + typename test::GemmTestbedTraits< + typename GemmTraits_::Epilogue::ScalarC>::host_type, // CType + typename test::GemmTestbedTraits< + typename GemmTraits_::Epilogue::Accumulators::Element>::host_type, // Accumulator + typename test::GemmTestbedTraits::host_type // Scalar + > + testbed(m, + n, + k, + batch_count, + test::convert(GemmTraits_::kLayoutA), + test::convert(GemmTraits_::kLayoutB), + alpha, + beta); + + testbed.initialize(); + + // host support is not implemented for strided batched gemm + // if (testbed.has_cublas_support()) { + // EXPECT_TRUE(testbed.verify_host_with_cublas()); + //} + + params.initialize(testbed.M(), + testbed.N(), + testbed.K(), + testbed.alpha, + testbed.ptr_A(), + testbed.lda(), + testbed.get_batch_stride_A(), + testbed.ptr_B(), + testbed.ldb(), + testbed.get_batch_stride_B(), + testbed.beta, + testbed.ptr_C_initial(), + testbed.ldc(), + testbed.get_batch_stride_C(), + testbed.ptr_computed(), + testbed.ldc(), + testbed.get_batch_stride_C(), + testbed.get_batch_count()); + + Gemm::launch(params); + + cudaError_t result = cudaDeviceSynchronize(); + ASSERT_EQ(result, cudaSuccess) << "\nCUDA kernel launch error: " << cudaGetErrorString(result) + << "\n"; + + if (testbed.has_cublas_support()) { + ASSERT_TRUE(testbed.verify_with_cublas()); + } else { + // ASSERT_TRUE(testbed.verify_with_host()); + ASSERT_TRUE(false) << "host support is not implemented for strided batched gemm" << std::endl; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/sgemm_128x128x16.cu b/tools/test/unit/gemm/sgemm_128x128x16.cu index 234a2d976..40e49980d 100644 --- a/tools/test/unit/gemm/sgemm_128x128x16.cu +++ b/tools/test/unit/gemm/sgemm_128x128x16.cu @@ -22,16 +22,16 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x128x16_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 128, 16); @@ -40,7 +40,7 @@ TEST(Sgemm_128x128x16, sgemm_128x128x16_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x81x1_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 81, 1); @@ -49,7 +49,7 @@ TEST(Sgemm_128x128x16, sgemm_128x81x1_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x112x16_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 112, 16); @@ -58,7 +58,7 @@ TEST(Sgemm_128x128x16, sgemm_128x112x16_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x112x17_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 112, 17); @@ -67,7 +67,7 @@ TEST(Sgemm_128x128x16, sgemm_128x112x17_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x73x16_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 73, 16); @@ -76,7 +76,7 @@ TEST(Sgemm_128x128x16, sgemm_128x73x16_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_97x112x64_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(97, 112, 64); @@ -85,7 +85,7 @@ TEST(Sgemm_128x128x16, sgemm_97x112x64_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_256x112x16_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(256, 112, 16); @@ -94,7 +94,7 @@ TEST(Sgemm_128x128x16, sgemm_256x112x16_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x240x16_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 240, 16); @@ -103,7 +103,7 @@ TEST(Sgemm_128x128x16, sgemm_128x240x16_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_256x240x16_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(256, 240, 16); @@ -112,7 +112,7 @@ TEST(Sgemm_128x128x16, sgemm_256x240x16_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x128x16_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 128, 16); @@ -121,7 +121,7 @@ TEST(Sgemm_128x128x16, sgemm_128x128x16_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x112x1_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -131,7 +131,7 @@ TEST(Sgemm_128x128x16, sgemm_128x112x1_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_79x112x16_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -141,7 +141,7 @@ TEST(Sgemm_128x128x16, sgemm_79x112x16_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x81x17_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -151,7 +151,7 @@ TEST(Sgemm_128x128x16, sgemm_128x81x17_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x112x16_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -161,7 +161,7 @@ TEST(Sgemm_128x128x16, sgemm_128x112x16_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x73x64_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -171,7 +171,7 @@ TEST(Sgemm_128x128x16, sgemm_128x73x64_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_256x112x16_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -181,7 +181,7 @@ TEST(Sgemm_128x128x16, sgemm_256x112x16_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x256x16_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -191,7 +191,7 @@ TEST(Sgemm_128x128x16, sgemm_128x256x16_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_256x256x16_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -201,7 +201,7 @@ TEST(Sgemm_128x128x16, sgemm_256x256x16_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x128x16_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 128, 16); @@ -210,7 +210,7 @@ TEST(Sgemm_128x128x16, sgemm_128x128x16_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x128x1_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 128, 1); } @@ -218,7 +218,7 @@ TEST(Sgemm_128x128x16, sgemm_128x128x1_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_127x112x16_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -228,7 +228,7 @@ TEST(Sgemm_128x128x16, sgemm_127x112x16_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_21x112x17_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -238,7 +238,7 @@ TEST(Sgemm_128x128x16, sgemm_21x112x17_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x73x16_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -248,7 +248,7 @@ TEST(Sgemm_128x128x16, sgemm_128x73x16_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x81x64_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -258,7 +258,7 @@ TEST(Sgemm_128x128x16, sgemm_128x81x64_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_256x112x16_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -268,7 +268,7 @@ TEST(Sgemm_128x128x16, sgemm_256x112x16_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_47x256x16_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -278,7 +278,7 @@ TEST(Sgemm_128x128x16, sgemm_47x256x16_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_211x256x16_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -288,7 +288,7 @@ TEST(Sgemm_128x128x16, sgemm_211x256x16_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x128x16_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 128, 16); @@ -297,7 +297,7 @@ TEST(Sgemm_128x128x16, sgemm_128x128x16_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x128x1_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 128, 1); } @@ -305,7 +305,7 @@ TEST(Sgemm_128x128x16, sgemm_128x128x1_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_109x112x16_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(109, 112, 16); @@ -314,7 +314,7 @@ TEST(Sgemm_128x128x16, sgemm_109x112x16_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x112x17_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 112, 17); @@ -323,7 +323,7 @@ TEST(Sgemm_128x128x16, sgemm_128x112x17_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x112x16_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 112, 16); @@ -332,7 +332,7 @@ TEST(Sgemm_128x128x16, sgemm_128x112x16_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_123x112x64_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(123, 112, 64); @@ -341,7 +341,7 @@ TEST(Sgemm_128x128x16, sgemm_123x112x64_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_256x112x16_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(256, 112, 16); @@ -350,7 +350,7 @@ TEST(Sgemm_128x128x16, sgemm_256x112x16_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x256x16_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 256, 16); @@ -359,7 +359,7 @@ TEST(Sgemm_128x128x16, sgemm_128x256x16_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_256x256x16_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(256, 256, 16); @@ -369,13 +369,12 @@ TEST(Sgemm_128x128x16, sgemm_256x256x16_tt) { TEST(Sgemm_128x128x16, sgemm_120x112x64_ldg4_nt) { // Load 4 floats per LDG for A/B. - typedef cutlass::gemm::SgemmTraits, cutlass::gemm::LinearScaling, cutlass::Shape<8, 8, 8>, - 4, - 4> + 4, 4> SgemmTraits; run_gemm(120, 112, 64); } @@ -383,7 +382,7 @@ TEST(Sgemm_128x128x16, sgemm_120x112x64_ldg4_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x128x16_alpha2_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 128, 16, 2.f, 0.f); @@ -392,7 +391,7 @@ TEST(Sgemm_128x128x16, sgemm_128x128x16_alpha2_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x112x16_beta1_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 112, 16, 1.f, 1.f); @@ -401,7 +400,7 @@ TEST(Sgemm_128x128x16, sgemm_128x112x16_beta1_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x128x16, sgemm_128x112x16_alpha2_beta1_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 112, 16, 2.f, 1.f); diff --git a/tools/test/unit/gemm/sgemm_128x128x8.cu b/tools/test/unit/gemm/sgemm_128x128x8.cu index 51f91217b..a9931f340 100644 --- a/tools/test/unit/gemm/sgemm_128x128x8.cu +++ b/tools/test/unit/gemm/sgemm_128x128x8.cu @@ -22,11 +22,20 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" + + //////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_128x128x8, sgemm_1024x512x8_nt) { + typedef cutlass::gemm::SgemmTraits > + SgemmTraits; + run_gemm(1024, 512, 8); +} //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/sgemm_128x32x16.cu b/tools/test/unit/gemm/sgemm_128x32x16.cu index 6b5d80210..2886eef5c 100644 --- a/tools/test/unit/gemm/sgemm_128x32x16.cu +++ b/tools/test/unit/gemm/sgemm_128x32x16.cu @@ -22,16 +22,16 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x32x1_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 32, 1); @@ -40,7 +40,7 @@ TEST(Sgemm_128x32x16, sgemm_128x32x1_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x32x16_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 32, 16); @@ -49,7 +49,7 @@ TEST(Sgemm_128x32x16, sgemm_128x32x16_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x32x17_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 32, 17); @@ -58,7 +58,7 @@ TEST(Sgemm_128x32x16, sgemm_128x32x17_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x32x32_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 32, 32); @@ -67,7 +67,7 @@ TEST(Sgemm_128x32x16, sgemm_128x32x32_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_256x32x16_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(256, 32, 16); @@ -76,7 +76,7 @@ TEST(Sgemm_128x32x16, sgemm_256x32x16_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x64x16_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 64, 16); @@ -85,7 +85,7 @@ TEST(Sgemm_128x32x16, sgemm_128x64x16_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_256x64x16_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(256, 64, 16); @@ -94,7 +94,7 @@ TEST(Sgemm_128x32x16, sgemm_256x64x16_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x32x1_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -104,7 +104,7 @@ TEST(Sgemm_128x32x16, sgemm_128x32x1_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x32x16_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -114,7 +114,7 @@ TEST(Sgemm_128x32x16, sgemm_128x32x16_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x32x17_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -124,7 +124,7 @@ TEST(Sgemm_128x32x16, sgemm_128x32x17_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x32x32_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -134,7 +134,7 @@ TEST(Sgemm_128x32x16, sgemm_128x32x32_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_256x32x16_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -144,7 +144,7 @@ TEST(Sgemm_128x32x16, sgemm_256x32x16_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x64x16_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -154,7 +154,7 @@ TEST(Sgemm_128x32x16, sgemm_128x64x16_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_256x64x16_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -164,7 +164,7 @@ TEST(Sgemm_128x32x16, sgemm_256x64x16_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x32x1_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 128, 1); } @@ -172,7 +172,7 @@ TEST(Sgemm_128x32x16, sgemm_128x32x1_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x32x16_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -182,7 +182,7 @@ TEST(Sgemm_128x32x16, sgemm_128x32x16_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x32x17_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -192,7 +192,7 @@ TEST(Sgemm_128x32x16, sgemm_128x32x17_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x32x32_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -202,7 +202,7 @@ TEST(Sgemm_128x32x16, sgemm_128x32x32_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_256x32x16_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -212,7 +212,7 @@ TEST(Sgemm_128x32x16, sgemm_256x32x16_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x64x16_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -222,7 +222,7 @@ TEST(Sgemm_128x32x16, sgemm_128x64x16_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_256x64x16_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -232,7 +232,7 @@ TEST(Sgemm_128x32x16, sgemm_256x64x16_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x32x1_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 128, 1); } @@ -240,7 +240,7 @@ TEST(Sgemm_128x32x16, sgemm_128x32x1_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x32x16_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 32, 16); @@ -249,7 +249,7 @@ TEST(Sgemm_128x32x16, sgemm_128x32x16_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x32x17_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 32, 17); @@ -258,7 +258,7 @@ TEST(Sgemm_128x32x16, sgemm_128x32x17_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x32x32_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 32, 32); @@ -267,7 +267,7 @@ TEST(Sgemm_128x32x16, sgemm_128x32x32_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_256x32x16_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(256, 32, 16); @@ -276,7 +276,7 @@ TEST(Sgemm_128x32x16, sgemm_256x32x16_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_128x64x16_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 64, 16); @@ -285,7 +285,7 @@ TEST(Sgemm_128x32x16, sgemm_128x64x16_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x32x16, sgemm_256x64x16_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(256, 64, 16); diff --git a/tools/test/unit/gemm/sgemm_128x32x8.cu b/tools/test/unit/gemm/sgemm_128x32x8.cu index 9f4c07154..5e7a9f75b 100644 --- a/tools/test/unit/gemm/sgemm_128x32x8.cu +++ b/tools/test/unit/gemm/sgemm_128x32x8.cu @@ -22,11 +22,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/sgemm_128x64x16.cu b/tools/test/unit/gemm/sgemm_128x64x16.cu index d49f7b19a..5852a6b17 100644 --- a/tools/test/unit/gemm/sgemm_128x64x16.cu +++ b/tools/test/unit/gemm/sgemm_128x64x16.cu @@ -22,16 +22,16 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x64x1_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 64, 1); @@ -40,7 +40,7 @@ TEST(Sgemm_128x64x16, sgemm_128x64x1_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x64x16_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 64, 16); @@ -49,7 +49,7 @@ TEST(Sgemm_128x64x16, sgemm_128x64x16_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x64x17_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 64, 17); @@ -58,7 +58,7 @@ TEST(Sgemm_128x64x16, sgemm_128x64x17_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x64x64_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 64, 64); @@ -67,7 +67,7 @@ TEST(Sgemm_128x64x16, sgemm_128x64x64_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_256x64x16_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(256, 64, 16); @@ -76,7 +76,7 @@ TEST(Sgemm_128x64x16, sgemm_256x64x16_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x128x16_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 128, 16); @@ -85,7 +85,7 @@ TEST(Sgemm_128x64x16, sgemm_128x128x16_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_256x128x16_nt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(256, 128, 16); @@ -94,7 +94,7 @@ TEST(Sgemm_128x64x16, sgemm_256x128x16_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x64x1_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -104,7 +104,7 @@ TEST(Sgemm_128x64x16, sgemm_128x64x1_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x64x8_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -114,7 +114,7 @@ TEST(Sgemm_128x64x16, sgemm_128x64x8_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x64x17_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -124,7 +124,7 @@ TEST(Sgemm_128x64x16, sgemm_128x64x17_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x64x64_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -134,7 +134,7 @@ TEST(Sgemm_128x64x16, sgemm_128x64x64_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_256x64x16_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -144,7 +144,7 @@ TEST(Sgemm_128x64x16, sgemm_256x64x16_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x128x16_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -154,7 +154,7 @@ TEST(Sgemm_128x64x16, sgemm_128x128x16_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_256x128x16_nn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -164,7 +164,7 @@ TEST(Sgemm_128x64x16, sgemm_256x128x16_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x64x1_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 128, 1); } @@ -172,7 +172,7 @@ TEST(Sgemm_128x64x16, sgemm_128x64x1_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x64x16_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -182,7 +182,7 @@ TEST(Sgemm_128x64x16, sgemm_128x64x16_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x64x17_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -192,7 +192,7 @@ TEST(Sgemm_128x64x16, sgemm_128x64x17_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x64x64_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -202,7 +202,7 @@ TEST(Sgemm_128x64x16, sgemm_128x64x64_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_256x64x16_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -212,7 +212,7 @@ TEST(Sgemm_128x64x16, sgemm_256x64x16_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x128x16_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -222,7 +222,7 @@ TEST(Sgemm_128x64x16, sgemm_128x128x16_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_256x128x16_tn) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; @@ -232,7 +232,7 @@ TEST(Sgemm_128x64x16, sgemm_256x128x16_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x64x1_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 128, 1); } @@ -240,7 +240,7 @@ TEST(Sgemm_128x64x16, sgemm_128x64x1_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x64x16_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 64, 16); @@ -249,7 +249,7 @@ TEST(Sgemm_128x64x16, sgemm_128x64x16_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x64x17_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 64, 17); @@ -258,7 +258,7 @@ TEST(Sgemm_128x64x16, sgemm_128x64x17_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x64x64_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 64, 64); @@ -267,7 +267,7 @@ TEST(Sgemm_128x64x16, sgemm_128x64x64_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_128x128x16_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(128, 128, 16); @@ -276,7 +276,7 @@ TEST(Sgemm_128x64x16, sgemm_128x128x16_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_128x64x16, sgemm_256x128x16_tt) { - typedef cutlass::gemm::SgemmTraits > SgemmTraits; run_gemm(256, 128, 16); diff --git a/tools/test/unit/gemm/sgemm_128x64x8.cu b/tools/test/unit/gemm/sgemm_128x64x8.cu index fc8185dbb..e07c38db3 100644 --- a/tools/test/unit/gemm/sgemm_128x64x8.cu +++ b/tools/test/unit/gemm/sgemm_128x64x8.cu @@ -22,11 +22,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -348,7 +348,7 @@ TEST(Sgemm_128x64x8, sgemm_128x64x64_4x8_accumulators_nt) { cutlass::MatrixLayout::kRowMajor, cutlass::Shape<8, 64, 128>, cutlass::gemm::LinearScaling, - cutlass::Shape<8, 8, 4> > + cutlass::Shape<8, 8, 8> > SgemmTraits; run_gemm(128, 64, 64); } diff --git a/tools/test/unit/gemm/sgemm_64x128x16.cu b/tools/test/unit/gemm/sgemm_64x128x16.cu index 5fdeb1f6f..c4afa3414 100644 --- a/tools/test/unit/gemm/sgemm_64x128x16.cu +++ b/tools/test/unit/gemm/sgemm_64x128x16.cu @@ -22,16 +22,16 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Sgemm_64x128x16, sgemm_64x128x64_4x8_accumulators_nt) { - typedef cutlass::gemm::SgemmTraits, cutlass::gemm::LinearScaling, diff --git a/tools/test/unit/gemm/sgemm_64x128x8.cu b/tools/test/unit/gemm/sgemm_64x128x8.cu index 6d3448e0d..e87abd2fb 100644 --- a/tools/test/unit/gemm/sgemm_64x128x8.cu +++ b/tools/test/unit/gemm/sgemm_64x128x8.cu @@ -22,11 +22,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/sgemm_64x32x16.cu b/tools/test/unit/gemm/sgemm_64x32x16.cu index e0f7841a2..0cb0f2b76 100644 --- a/tools/test/unit/gemm/sgemm_64x32x16.cu +++ b/tools/test/unit/gemm/sgemm_64x32x16.cu @@ -22,11 +22,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/sgemm_64x32x8.cu b/tools/test/unit/gemm/sgemm_64x32x8.cu index e1ec1aebf..3e8c60aaf 100644 --- a/tools/test/unit/gemm/sgemm_64x32x8.cu +++ b/tools/test/unit/gemm/sgemm_64x32x8.cu @@ -22,11 +22,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/sgemm_64x64x16.cu b/tools/test/unit/gemm/sgemm_64x64x16.cu index 3dd79e607..45619cef8 100644 --- a/tools/test/unit/gemm/sgemm_64x64x16.cu +++ b/tools/test/unit/gemm/sgemm_64x64x16.cu @@ -22,11 +22,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/sgemm_64x64x8.cu b/tools/test/unit/gemm/sgemm_64x64x8.cu index 5c373974f..7b02c46db 100644 --- a/tools/test/unit/gemm/sgemm_64x64x8.cu +++ b/tools/test/unit/gemm/sgemm_64x64x8.cu @@ -22,11 +22,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/gemm/sgemm_threadblock_swizzle_nn.cu b/tools/test/unit/gemm/sgemm_threadblock_swizzle_nn.cu new file mode 100644 index 000000000..fab590660 --- /dev/null +++ b/tools/test/unit/gemm/sgemm_threadblock_swizzle_nn.cu @@ -0,0 +1,1481 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" + +#pragma warning( disable : 4503) + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//Row Major Swizzle +TEST(Sgemm_512x256x16_swizzle, sgemm_128x128x16_nn_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x64x16_nn_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x64x16_nn_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nn_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nn_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nn_RowMajorSwizzle_groupCol2) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nn_RowMajorSwizzle_groupCol2) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nn_RowMajorSwizzle_groupCol3) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nn_RowMajorSwizzle_groupCol3) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//Row Major Swizzle Boustrophedon +TEST(Sgemm_512x256x16_swizzle, sgemm_128x128x16_nn_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x64x16_nn_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x64x16_nn_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nn_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nn_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nn_RowMajorSwizzle_groupCol2_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nn_RowMajorSwizzle_groupCol2_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nn_RowMajorSwizzle_groupCol3_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nn_RowMajorSwizzle_groupCol3_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//Column Major Swizzle + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x128x16_nn_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x128x16_nn_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x64x16_nn_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x64x16_nn_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nn_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nn_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nn_ColumnMajorSwizzle_groupCol2) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nn_ColumnMajorSwizzle_groupCol2) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nn_ColumnMajorSwizzle_groupCol3) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nn_ColumnMajorSwizzle_groupCol3) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//Column Major Swizzle + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x128x16_nn_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x128x16_nn_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x64x16_nn_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x64x16_nn_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nn_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nn_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nn_ColumnMajorSwizzle_groupCol2_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nn_ColumnMajorSwizzle_groupCol2_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nn_ColumnMajorSwizzle_groupCol3_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nn_ColumnMajorSwizzle_groupCol3_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + + diff --git a/tools/test/unit/gemm/sgemm_threadblock_swizzle_nt.cu b/tools/test/unit/gemm/sgemm_threadblock_swizzle_nt.cu new file mode 100644 index 000000000..c436cdf53 --- /dev/null +++ b/tools/test/unit/gemm/sgemm_threadblock_swizzle_nt.cu @@ -0,0 +1,1481 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" + +#pragma warning( disable : 4503) + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//Row Major Swizzle +TEST(Sgemm_512x256x16_swizzle, sgemm_128x128x16_nt_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x64x16_nt_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x64x16_nt_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nt_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nt_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nt_RowMajorSwizzle_groupCol2) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nt_RowMajorSwizzle_groupCol2) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nt_RowMajorSwizzle_groupCol3) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nt_RowMajorSwizzle_groupCol3) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//Row Major Swizzle Boustrophedon +TEST(Sgemm_512x256x16_swizzle, sgemm_128x128x16_nt_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x64x16_nt_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x64x16_nt_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nt_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nt_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nt_RowMajorSwizzle_groupCol2_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nt_RowMajorSwizzle_groupCol2_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nt_RowMajorSwizzle_groupCol3_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nt_RowMajorSwizzle_groupCol3_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//Column Major Swizzle + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x128x16_nt_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x128x16_nt_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x64x16_nt_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x64x16_nt_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nt_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nt_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nt_ColumnMajorSwizzle_groupCol2) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nt_ColumnMajorSwizzle_groupCol2) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nt_ColumnMajorSwizzle_groupCol3) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nt_ColumnMajorSwizzle_groupCol3) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//Column Major Swizzle + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x128x16_nt_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x128x16_nt_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x64x16_nt_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x64x16_nt_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nt_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nt_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nt_ColumnMajorSwizzle_groupCol2_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nt_ColumnMajorSwizzle_groupCol2_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_nt_ColumnMajorSwizzle_groupCol3_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_nt_ColumnMajorSwizzle_groupCol3_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + + diff --git a/tools/test/unit/gemm/sgemm_threadblock_swizzle_tn.cu b/tools/test/unit/gemm/sgemm_threadblock_swizzle_tn.cu new file mode 100644 index 000000000..b8b9f7fdc --- /dev/null +++ b/tools/test/unit/gemm/sgemm_threadblock_swizzle_tn.cu @@ -0,0 +1,1481 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" + +#pragma warning( disable : 4503) + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//Row Major Swizzle +TEST(Sgemm_512x256x16_swizzle, sgemm_128x128x16_tn_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x64x16_tn_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x64x16_tn_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tn_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tn_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tn_RowMajorSwizzle_groupCol2) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tn_RowMajorSwizzle_groupCol2) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tn_RowMajorSwizzle_groupCol3) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tn_RowMajorSwizzle_groupCol3) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//Row Major Swizzle Boustrophedon +TEST(Sgemm_512x256x16_swizzle, sgemm_128x128x16_tn_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x64x16_tn_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x64x16_tn_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tn_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tn_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tn_RowMajorSwizzle_groupCol2_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tn_RowMajorSwizzle_groupCol2_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tn_RowMajorSwizzle_groupCol3_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tn_RowMajorSwizzle_groupCol3_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//Column Major Swizzle + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x128x16_tn_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x128x16_tn_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x64x16_tn_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x64x16_tn_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tn_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tn_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tn_ColumnMajorSwizzle_groupCol2) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tn_ColumnMajorSwizzle_groupCol2) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tn_ColumnMajorSwizzle_groupCol3) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tn_ColumnMajorSwizzle_groupCol3) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//Column Major Swizzle + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x128x16_tn_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x128x16_tn_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x64x16_tn_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x64x16_tn_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tn_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tn_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tn_ColumnMajorSwizzle_groupCol2_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tn_ColumnMajorSwizzle_groupCol2_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tn_ColumnMajorSwizzle_groupCol3_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tn_ColumnMajorSwizzle_groupCol3_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + + diff --git a/tools/test/unit/gemm/sgemm_threadblock_swizzle_tt.cu b/tools/test/unit/gemm/sgemm_threadblock_swizzle_tt.cu new file mode 100644 index 000000000..e1ceae68f --- /dev/null +++ b/tools/test/unit/gemm/sgemm_threadblock_swizzle_tt.cu @@ -0,0 +1,1481 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/sgemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" + +#pragma warning( disable : 4503) + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//Row Major Swizzle +TEST(Sgemm_512x256x16_swizzle, sgemm_128x128x16_tt_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x64x16_tt_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x64x16_tt_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tt_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tt_RowMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tt_RowMajorSwizzle_groupCol2) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tt_RowMajorSwizzle_groupCol2) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tt_RowMajorSwizzle_groupCol3) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tt_RowMajorSwizzle_groupCol3) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//Row Major Swizzle Boustrophedon +TEST(Sgemm_512x256x16_swizzle, sgemm_128x128x16_tt_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x64x16_tt_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x64x16_tt_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tt_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tt_RowMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tt_RowMajorSwizzle_groupCol2_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tt_RowMajorSwizzle_groupCol2_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tt_RowMajorSwizzle_groupCol3_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tt_RowMajorSwizzle_groupCol3_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::RowMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//Column Major Swizzle + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x128x16_tt_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x128x16_tt_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x64x16_tt_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x64x16_tt_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tt_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tt_ColumnMajorSwizzle) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tt_ColumnMajorSwizzle_groupCol2) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tt_ColumnMajorSwizzle_groupCol2) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tt_ColumnMajorSwizzle_groupCol3) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tt_ColumnMajorSwizzle_groupCol3) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::OneDirection>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//Column Major Swizzle + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x128x16_tt_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x128x16_tt_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_512x256x16_swizzle, sgemm_128x64x16_tt_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(512, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_516x260x16_swizzle, sgemm_128x64x16_tt_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(516, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tt_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tt_ColumnMajorSwizzle_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<1, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tt_ColumnMajorSwizzle_groupCol2_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tt_ColumnMajorSwizzle_groupCol2_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<2, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1030, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1024x256x16_swizzle, sgemm_64x32x16_tt_ColumnMajorSwizzle_groupCol3_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 256, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Sgemm_1030x260x16_swizzle, sgemm_64x32x16_tt_ColumnMajorSwizzle_groupCol3_Boustrophedon) { + typedef int index; + typedef cutlass::gemm::SgemmConfig/*OutputTile*/, + cutlass::Shape<8, 8, 8>/*ThreadGemmShape*/, + 1/*kScalarsPerLdgA*/, + 1/*kScalarsPerLdgB*/> + thisGemmConfig; + typedef cutlass::gemm::GemmTileTraitsHelperA + GemmTileTraitsHelperA; + typedef cutlass::gemm::GemmTileTraitsHelperB + GemmTileTraitsHelperB; + typedef cutlass::gemm::SimplifiedGemmTraitsHelper + Helper; + typedef cutlass::gemm::LinearScaling + EpilogueFunctor; + typedef cutlass::gemm::SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + typedef cutlass::gemm::ClearAccumulators + ClearAccumulators; + + typedef cutlass::gemm::GemmTraits< + thisGemmConfig, + typename Helper::GlobalLoadStreamA, + typename Helper::GlobalLoadStreamB, + typename Helper::SharedLoadStreamA, + typename Helper::SharedLoadStreamB, + typename cutlass::gemm::GemmEpilogue, + typename cutlass::gemm::ColumnMajorBlockSwizzle<3, cutlass::gemm::swizzleDirection::Boustrophedon>, + index, + ClearAccumulators + > + SgemmTraits; + // + run_gemm(1024, 260, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + + diff --git a/tools/test/unit/gemm/warp_multiply_add_nvcuda.cu b/tools/test/unit/gemm/warp_multiply_add_nvcuda.cu new file mode 100644 index 000000000..7214e372e --- /dev/null +++ b/tools/test/unit/gemm/warp_multiply_add_nvcuda.cu @@ -0,0 +1,276 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// Guard conditions around the entire file. +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700 + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#include "cutlass_unit_tests.h" +#include "tools/util/half.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "cutlass/gemm/warp_multiply_add_nvcuda.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Tests for Warp-level Multiply Add operation using NvCuda API +// + +namespace test { + +/// +template +__global__ void warp_multiply_add( + typename WarpMultiplyAdd::ScalarA const *A, + int lda, + typename WarpMultiplyAdd::ScalarB const *B, + int ldb, + typename WarpMultiplyAdd::ScalarC *C, int ldc) { + + typename WarpMultiplyAdd::LoadIteratorA iteratorA(A, lda); + typename WarpMultiplyAdd::LoadIteratorB iteratorB(B, ldb); + typename WarpMultiplyAdd::StoreIteratorC iteratorC(C, ldc); + + typename WarpMultiplyAdd::FragmentA fragmentA; + typename WarpMultiplyAdd::FragmentB fragmentB; + typename WarpMultiplyAdd::FragmentC fragmentC; + + iteratorA.load(fragmentA); + iteratorB.load(fragmentB); + + fragmentC.clear(); + + WarpMultiplyAdd::multiply_add(fragmentC, fragmentA, fragmentB, fragmentC); + + iteratorC.store(fragmentC); +} + +/// Test environment for Warp Multiply Add operation +template < + cutlass::MatrixLayout::Kind LayoutA, + cutlass::MatrixLayout::Kind LayoutB, + cutlass::MatrixLayout::Kind LayoutC, + typename ScalarC, + typename WarpTile, + typename WmmaTile +> +struct TestWarpMultiplyAdd { + + typedef cutlass::gemm::WarpMultiplyAddNvcuda< + LayoutA, + LayoutB, + LayoutC, + half, + half, + ScalarC, + WarpTile, + cutlass::Shape<1, 1, 1, 1>, + WmmaTile + > WarpMultiplyAdd; + + /// Testbed type + typedef test::GemmTestbed< + cutlass::half_t, + cutlass::half_t, + ScalarC, + ScalarC, + ScalarC + > GemmTestbed; + + // + // Data members + // + + GemmTestbed testbed; + + // + // Methods + // + + TestWarpMultiplyAdd(): testbed( + WarpTile::kW, // M + WarpTile::kH, // N + WarpTile::kD, // K + cutlass::convert(LayoutA), + cutlass::convert(LayoutB), + 1, + 0, + CUBLAS_GEMM_DEFAULT_TENSOR_OP, + cutlass::convert(LayoutC)) + { + + } + + /// Run + void run() { + testbed.initialize(); + + // launch + warp_multiply_add<<< + dim3(1,1,1), dim3(32, 1, 1) + >>>( + testbed.ptr_A(), + testbed.lda(), + testbed.ptr_B(), + testbed.ldb(), + testbed.ptr_computed(), + testbed.ldc() + ); + + // verify + ASSERT_TRUE(testbed.verify_with_host()); + } +}; + +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + typename ScalarC, + typename WarpTile, + typename WmmaTile +> +struct TestWarpMultiplyAddForAllLayouts { + + void run() { + + test::TestWarpMultiplyAdd< + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kColumnMajor, + ScalarC, + WarpTile, + WmmaTile + >().run(); + + test::TestWarpMultiplyAdd< + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kColumnMajor, + ScalarC, + WarpTile, + WmmaTile + >().run(); + + test::TestWarpMultiplyAdd< + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kColumnMajor, + ScalarC, + WarpTile, + WmmaTile + >().run(); + + test::TestWarpMultiplyAdd< + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kColumnMajor, + ScalarC, + WarpTile, + WmmaTile + >().run(); + + + test::TestWarpMultiplyAdd< + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kRowMajor, + ScalarC, + WarpTile, + WmmaTile + >().run(); + + test::TestWarpMultiplyAdd< + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kRowMajor, + ScalarC, + WarpTile, + WmmaTile + >().run(); + + test::TestWarpMultiplyAdd< + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor, + ScalarC, + WarpTile, + WmmaTile + >().run(); + + test::TestWarpMultiplyAdd< + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor, + ScalarC, + WarpTile, + WmmaTile + >().run(); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// 16x16x16 WMMA Tile Shape with F32 accumulation +// + +TEST(WmmaGemm, WarpMultiplyAdd_f32_16x16x16_16x16x16) { + TestWarpMultiplyAddForAllLayouts< + float, + cutlass::Shape<16, 16, 16>, + cutlass::Shape<16, 16, 16> + >().run(); +} + +TEST(WmmaGemm, WarpMultiplyAdd_f32_16x16x32_16x16x16) { + TestWarpMultiplyAddForAllLayouts< + float, + cutlass::Shape<16, 16, 32>, + cutlass::Shape<16, 16, 16> + >().run(); +} + +TEST(WmmaGemm, WarpMultiplyAdd_f32_16x32x32_16x16x16) { + TestWarpMultiplyAddForAllLayouts< + float, + cutlass::Shape<16, 32, 32>, + cutlass::Shape<16, 16, 16> + >().run(); +} + +TEST(WmmaGemm, WarpMultiplyAdd_f32_16x32x64_16x16x16) { + TestWarpMultiplyAddForAllLayouts< + float, + cutlass::Shape<16, 32, 64>, + cutlass::Shape<16, 16, 16> + >().run(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#endif diff --git a/tools/test/unit/gemm/wmma_binary_gemm.cu b/tools/test/unit/gemm/wmma_binary_gemm.cu new file mode 100644 index 000000000..b545179d5 --- /dev/null +++ b/tools/test/unit/gemm/wmma_binary_gemm.cu @@ -0,0 +1,236 @@ +/*************************************************************************************************** + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#include "cutlass/wmma_matrix.h" +#ifdef CUTLASS_USE_SUBBYTE_WMMA + +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/wmma_gemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/binary_gemm.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaBinaryGemm_32x32x256, wmma_binary_gemm_32x32x256) { + + typedef cutlass::gemm::WmmaGemmTraits, + cutlass::Vector, + cutlass::Vector, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<256, 32, 32>, + cutlass::Shape<128, 8, 8>, + 32, + 32> + WmmaGemmTraits; + run_binary_gemm(32, 32, 256); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaBinaryGemm_32x32x512, wmma_binary_gemm_32x32x512) { + + typedef cutlass::gemm::WmmaGemmTraits, + cutlass::Vector, + cutlass::Vector, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<512, 32, 32>, + cutlass::Shape<128, 8, 8>, + 32, + 32> + WmmaGemmTraits; + run_binary_gemm(32, 32, 512); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaBinaryGemm_64x64x256, wmma_binary_gemm_64x64x256) { + + typedef cutlass::gemm::WmmaGemmTraits, + cutlass::Vector, + cutlass::Vector, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<256, 32, 32>, + cutlass::Shape<128, 8, 8>, + 32, + 32> + WmmaGemmTraits; + run_binary_gemm(64, 64, 256); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaBinaryGemm_64x32x256, wmma_binary_gemm_64x32x256) { + + typedef cutlass::gemm::WmmaGemmTraits, + cutlass::Vector, + cutlass::Vector, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<256, 32, 32>, + cutlass::Shape<128, 8, 8>, + 32, + 32> + WmmaGemmTraits; + run_binary_gemm(64, 32, 256); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaBinaryGemm_32x64x256, wmma_binary_gemm_32x64x256) { + + typedef cutlass::gemm::WmmaGemmTraits, + cutlass::Vector, + cutlass::Vector, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<256, 32, 32>, + cutlass::Shape<128, 8, 8>, + 32, + 32> + WmmaGemmTraits; + run_binary_gemm(32, 64, 256); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaBinaryGemm_128x128x256, wmma_binary_gemm_128x128x256) { + + typedef cutlass::gemm::WmmaGemmTraits, + cutlass::Vector, + cutlass::Vector, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<256, 64, 64>, + cutlass::Shape<128, 8, 8>, + 128, + 128> + WmmaGemmTraits; + run_binary_gemm(128, 128, 256); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaBinaryGemm_128x128x256, wmma_binary_gemm_512x512x256) { + + typedef cutlass::gemm::WmmaGemmTraits, + cutlass::Vector, + cutlass::Vector, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<256, 64, 64>, + cutlass::Shape<128, 8, 8>, + 32, + 32> + WmmaGemmTraits; + run_binary_gemm(512, 512, 256); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaBinaryGemm_32x32x256, wmma_binary_gemm_32x32x512) { + + typedef cutlass::gemm::WmmaGemmTraits, + cutlass::Vector, + cutlass::Vector, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<256, 32, 32>, + cutlass::Shape<128, 8, 8>, + 32, + 32> + WmmaGemmTraits; + run_binary_gemm(32, 32, 512); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaBinaryGemm_32x32x1024, wmma_binary_gemm_128x128x1024) { + + typedef cutlass::gemm::WmmaGemmTraits, + cutlass::Vector, + cutlass::Vector, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<1024, 32, 32>, + cutlass::Shape<128, 8, 8>, + 128, + 128> + WmmaGemmTraits; + run_binary_gemm(128, 128, 1024); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaBinaryGemm_64x32x1024, wmma_binary_gemm_128x128x1024) { + + typedef cutlass::gemm::WmmaGemmTraits, + cutlass::Vector, + cutlass::Vector, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<1024, 32, 64>, + cutlass::Shape<128, 8, 8>, + 128, + 128> + WmmaGemmTraits; + run_binary_gemm(128, 128, 1024); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#endif // ifdef CUTLASS_USE_SUBBYTE_WMMA diff --git a/tools/test/unit/gemm/wmma_gemm.cu b/tools/test/unit/gemm/wmma_gemm.cu index 6db07afce..07cb9bde2 100644 --- a/tools/test/unit/gemm/wmma_gemm.cu +++ b/tools/test/unit/gemm/wmma_gemm.cu @@ -22,20 +22,81 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ -#include +#include "cutlass/wmma_matrix.h" #if defined(CUTLASS_USE_WMMA_API) -#include -#include -#include -#include -#include +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/wmma_gemm_traits.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/run_gemm.h" +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// FP16 accumulation +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemm_16x16x32_f16, wmma_gemm_16x16x16_nn) { + typedef cutlass::gemm::WmmaGemmTraits< + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kColumnMajor, + cutlass::Shape<32, 16, 16>, + half, + half, + half, + cutlass::gemm::LinearScaling, + half + > + WmmaGemmTraits; + + run_gemm(16, 16, 16); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemm_16x16x32_f16, wmma_gemm_16x16x32_nn) { + + typedef cutlass::gemm::WmmaGemmTraits< + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kColumnMajor, + cutlass::Shape<32, 16, 16>, + half, + half, + half, + cutlass::gemm::LinearScaling, + half + > + WmmaGemmTraits; + run_gemm(16, 16, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemm_128x128x32_f16, wmma_16x16x16_gemm_256x256x128_nn) { + typedef cutlass::gemm::WmmaGemmTraits< + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kColumnMajor, + cutlass::Shape<32, 128, 128>, + half, + half, + half, + cutlass::gemm::LinearScaling, + half + > + WmmaGemmTraits; + run_gemm(256, 256, 128); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// FP32 accumulation +// //////////////////////////////////////////////////////////////////////////////////////////////////// TEST(WmmaGemm_16x16x32, wmma_gemm_16x16x16_nt) { typedef cutlass::gemm::WmmaGemmTraits > WmmaGemmTraits; run_gemm(16, 16, 16); @@ -45,7 +106,7 @@ TEST(WmmaGemm_16x16x32, wmma_gemm_16x16x16_nt) { TEST(WmmaGemm_16x16x32, wmma_gemm_16x16x32_nt) { typedef cutlass::gemm::WmmaGemmTraits > WmmaGemmTraits; run_gemm(16, 16, 32); @@ -53,9 +114,9 @@ TEST(WmmaGemm_16x16x32, wmma_gemm_16x16x32_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// -TEST(WmmaGemm_128x128x32, wmma_16x16x16_gemm_256x256x128_nt) { +TEST(WmmaGemm_128x128x32, wmma_16x16x16_gemm_256x256x128_nt) { typedef cutlass::gemm::WmmaGemmTraits > WmmaGemmTraits; run_gemm(256, 256, 128); @@ -64,10 +125,12 @@ TEST(WmmaGemm_128x128x32, wmma_16x16x16_gemm_256x256x128_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// #if defined(CUDA_VERSION) && CUDA_VERSION >= 9100 -TEST(WmmaGemm_128x128x32, wmma_8x32x16_gemm_256x256x128_nt) { +TEST(WmmaGemm_128x128x32, wmma_8x32x16_gemm_256x256x128_nt) { typedef cutlass::gemm::WmmaGemmTraits, + half, + half, float, cutlass::gemm::LinearScaling, float, @@ -82,10 +145,12 @@ TEST(WmmaGemm_128x128x32, wmma_8x32x16_gemm_256x256x128_nt) { //////////////////////////////////////////////////////////////////////////////////////////////////// #if defined(CUDA_VERSION) && CUDA_VERSION >= 9100 -TEST(WmmaGemm_128x128x32, wmma_32x8x16_gemm_256x256x128_nt) { +TEST(WmmaGemm_128x128x32, wmma_32x8x16_gemm_256x256x128_nt) { typedef cutlass::gemm::WmmaGemmTraits, + half, + half, float, cutlass::gemm::LinearScaling, float, @@ -100,7 +165,7 @@ TEST(WmmaGemm_128x128x32, wmma_32x8x16_gemm_256x256x128_nt) { TEST(WmmaGemm_16x16x32, wmma_gemm_16x16x16_nn) { typedef cutlass::gemm::WmmaGemmTraits > WmmaGemmTraits; run_gemm(16, 16, 16); @@ -110,7 +175,7 @@ TEST(WmmaGemm_16x16x32, wmma_gemm_16x16x16_nn) { TEST(WmmaGemm_16x16x32, wmma_gemm_16x16x32_nn) { typedef cutlass::gemm::WmmaGemmTraits > WmmaGemmTraits; run_gemm(16, 16, 32); @@ -118,9 +183,9 @@ TEST(WmmaGemm_16x16x32, wmma_gemm_16x16x32_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// -TEST(WmmaGemm_128x128x32, wmma_16x16x16_gemm_256x256x128_nn) { +TEST(WmmaGemm_128x128x32, wmma_16x16x16_gemm_256x256x128_nn) { typedef cutlass::gemm::WmmaGemmTraits > WmmaGemmTraits; run_gemm(256, 256, 128); @@ -129,10 +194,12 @@ TEST(WmmaGemm_128x128x32, wmma_16x16x16_gemm_256x256x128_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// #if defined(CUDA_VERSION) && CUDA_VERSION >= 9100 -TEST(WmmaGemm_128x128x32, wmma_8x32x16_gemm_256x256x128_nn) { +TEST(WmmaGemm_128x128x32, wmma_8x32x16_gemm_256x256x128_nn) { typedef cutlass::gemm::WmmaGemmTraits, + half, + half, float, cutlass::gemm::LinearScaling, float, @@ -147,10 +214,12 @@ TEST(WmmaGemm_128x128x32, wmma_8x32x16_gemm_256x256x128_nn) { //////////////////////////////////////////////////////////////////////////////////////////////////// #if defined(CUDA_VERSION) && CUDA_VERSION >= 9100 -TEST(WmmaGemm_128x128x32, wmma_32x8x16_gemm_256x256x128_nn) { +TEST(WmmaGemm_128x128x32, wmma_32x8x16_gemm_256x256x128_nn) { typedef cutlass::gemm::WmmaGemmTraits, + half, + half, float, cutlass::gemm::LinearScaling, float, @@ -165,7 +234,7 @@ TEST(WmmaGemm_128x128x32, wmma_32x8x16_gemm_256x256x128_nn) { TEST(WmmaGemm_16x16x32, wmma_gemm_16x16x16_tt) { typedef cutlass::gemm::WmmaGemmTraits > WmmaGemmTraits; run_gemm(16, 16, 16); @@ -175,7 +244,7 @@ TEST(WmmaGemm_16x16x32, wmma_gemm_16x16x16_tt) { TEST(WmmaGemm_16x16x32, wmma_gemm_16x16x32_tt) { typedef cutlass::gemm::WmmaGemmTraits > WmmaGemmTraits; run_gemm(16, 16, 32); @@ -183,9 +252,9 @@ TEST(WmmaGemm_16x16x32, wmma_gemm_16x16x32_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// -TEST(WmmaGemm_128x128x32, wmma_16x16x16_gemm_256x256x128_tt) { +TEST(WmmaGemm_128x128x32, wmma_16x16x16_gemm_256x256x128_tt) { typedef cutlass::gemm::WmmaGemmTraits > WmmaGemmTraits; run_gemm(256, 256, 128); @@ -194,10 +263,12 @@ TEST(WmmaGemm_128x128x32, wmma_16x16x16_gemm_256x256x128_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// #if defined(CUDA_VERSION) && CUDA_VERSION >= 9100 -TEST(WmmaGemm_128x128x32, wmma_8x32x16_gemm_256x256x128_tt) { +TEST(WmmaGemm_128x128x32, wmma_8x32x16_gemm_256x256x128_tt) { typedef cutlass::gemm::WmmaGemmTraits, + half, + half, float, cutlass::gemm::LinearScaling, float, @@ -212,10 +283,12 @@ TEST(WmmaGemm_128x128x32, wmma_8x32x16_gemm_256x256x128_tt) { //////////////////////////////////////////////////////////////////////////////////////////////////// #if defined(CUDA_VERSION) && CUDA_VERSION >= 9100 -TEST(WmmaGemm_128x128x32, wmma_32x8x16_gemm_256x256x128_tt) { +TEST(WmmaGemm_128x128x32, wmma_32x8x16_gemm_256x256x128_tt) { typedef cutlass::gemm::WmmaGemmTraits, + half, + half, float, cutlass::gemm::LinearScaling, float, @@ -230,7 +303,7 @@ TEST(WmmaGemm_128x128x32, wmma_32x8x16_gemm_256x256x128_tt) { TEST(WmmaGemm_16x16x32, wmma_gemm_16x16x16_tn) { typedef cutlass::gemm::WmmaGemmTraits > WmmaGemmTraits; run_gemm(16, 16, 16); @@ -240,7 +313,7 @@ TEST(WmmaGemm_16x16x32, wmma_gemm_16x16x16_tn) { TEST(WmmaGemm_16x16x32, wmma_gemm_16x16x32_tn) { typedef cutlass::gemm::WmmaGemmTraits > WmmaGemmTraits; run_gemm(16, 16, 32); @@ -248,9 +321,9 @@ TEST(WmmaGemm_16x16x32, wmma_gemm_16x16x32_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// -TEST(WmmaGemm_128x128x32, wmma_16x16x16_gemm_256x256x128_tn) { +TEST(WmmaGemm_128x128x32, wmma_16x16x16_gemm_256x256x128_tn) { typedef cutlass::gemm::WmmaGemmTraits > WmmaGemmTraits; run_gemm(256, 256, 128); @@ -259,10 +332,12 @@ TEST(WmmaGemm_128x128x32, wmma_16x16x16_gemm_256x256x128_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// #if defined(CUDA_VERSION) && CUDA_VERSION >= 9100 -TEST(WmmaGemm_128x128x32, wmma_8x32x16_gemm_256x256x128_tn) { +TEST(WmmaGemm_128x128x32, wmma_8x32x16_gemm_256x256x128_tn) { typedef cutlass::gemm::WmmaGemmTraits, + half, + half, float, cutlass::gemm::LinearScaling, float, @@ -277,10 +352,12 @@ TEST(WmmaGemm_128x128x32, wmma_8x32x16_gemm_256x256x128_tn) { //////////////////////////////////////////////////////////////////////////////////////////////////// #if defined(CUDA_VERSION) && CUDA_VERSION >= 9100 -TEST(WmmaGemm_128x128x32, wmma_32x8x16_gemm_256x256x128_tn) { +TEST(WmmaGemm_128x128x32, wmma_32x8x16_gemm_256x256x128_tn) { typedef cutlass::gemm::WmmaGemmTraits, + half, + half, float, cutlass::gemm::LinearScaling, float, diff --git a/tools/test/unit/gemm/wmma_gemm_epilogue.cu b/tools/test/unit/gemm/wmma_gemm_epilogue.cu new file mode 100644 index 000000000..b7fab2ae9 --- /dev/null +++ b/tools/test/unit/gemm/wmma_gemm_epilogue.cu @@ -0,0 +1,446 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// Guard conditions around the entire file. +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700 + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#include "cutlass_unit_tests.h" +#include "tools/util/half.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "cutlass/gemm/wmma_gemm_traits.h" +#include "cutlass/gemm/wmma_gemm_epilogue.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +__global__ void test_epilogue_kernel( + typename EpilogueTraits::Params params, + cutlass::Coord<3> problem, + typename EpilogueTraits::AccumulatorScalar *accum_ptr, + int ldm) { + + // Shared memory allocation + __shared__ typename EpilogueTraits::SharedStorage shared_storage; + + // + // Load accumulators from memory - normally, a GEMM would compute these + // + + // Traits class defines tiling + GemmTraits traits; + + int warp_id = (threadIdx.x / 32); + cutlass::Coord<3> warp_offset = traits(warp_id); + + // Accumulator fragment + typename EpilogueTraits::AccumulatorFragment accumulator; + + // Construct an out-of-band LoadIterator for accumulators to initialize them + + LoadAccumulatorIterator load_accum_iterator(accum_ptr, ldm, warp_offset); + load_accum_iterator.load(accumulator); + + __syncthreads(); + + // + // Test the epilogue itself + // + + typedef cutlass::gemm::WmmaGemmEpilogue Epilogue; + + Epilogue epilogue(params, problem, warp_offset); + + // Perform the epilogue operation + epilogue.update(shared_storage, accumulator); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + typename ThreadBlockTile, + typename WarpTile, + typename WmmaTile, + typename EpilogueTile, + typename StreamTile, + typename AccumulatorType, + typename ScalarC +> +struct TestWmmaGemmEpilogue { + + typedef cutlass::gemm::WmmaGemmTraits< + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor, + ThreadBlockTile, + WarpTile, + WmmaTile, + AccumulatorType, + AccumulatorType, + 1, + AccumulatorType, + EpilogueTile, + StreamTile + > Traits; + + // Construct an actual epilogue + typedef cutlass::gemm::EpilogueLinearScaling EpilogueLinearScaling; + + /// Define some traits + typedef cutlass::gemm::WmmaGemmEpilogueTraitsBasic< + ScalarC, + typename Traits::WarpMultiplyAdd::StoreIteratorC, + ScalarC, + ThreadBlockTile, + 32 * Traits::Warps::kCount, + WarpTile, + WmmaTile, + EpilogueTile, + StreamTile, + EpilogueLinearScaling + > WmmaGemmEpilogueTraits; + + /// Type alias for EpilogueTraits type + typedef typename WmmaGemmEpilogueTraits::Traits EpilogueTraits; + + TestWmmaGemmEpilogue() { + + } + + void run(cutlass::Coord<3> problem) { + // + // Prepare accumulator tile + // + cutlass::HostTensor accumulator_matrix; + cutlass::HostTensor source_matrix; + cutlass::HostTensor destination_matrix; + + accumulator_matrix.resize_matrix( + ThreadBlockTile::kW, + ThreadBlockTile::kH, + cutlass::MatrixLayout::kColumnMajor); + + source_matrix.resize_matrix( + problem[2], + problem[1], + cutlass::MatrixLayout::kColumnMajor); + + destination_matrix.resize_matrix( + problem[2], + problem[1], + cutlass::MatrixLayout::kColumnMajor); + + accumulator_matrix.fill_sequential(); + + source_matrix.fill_sequential(); + + int value = 0; + for (int row = 0; row < ThreadBlockTile::kW; ++row) { + for (int col = 0; col < ThreadBlockTile::kH; ++col, ++value) { + if (row < problem[2] && col < problem[1]) { + source_matrix.at(cutlass::make_Coord(0, row, col, 0)) = ScalarC(value); + } + } + } + + destination_matrix.fill(0); + + // + // Launch test kernel + // + dim3 grid(1,1); + dim3 block(32 * Traits::Warps::kCount, 1, 1); + + EpilogueLinearScaling functor; + functor.initialize(1, 0); + + typename EpilogueTraits::Params params; + + params.initialize( + functor, + source_matrix.device_data(), + source_matrix.leading_dim(), + destination_matrix.device_data(), + destination_matrix.leading_dim() + ); + + test_epilogue_kernel< + Traits, + EpilogueTraits, + typename Traits::WarpMultiplyAdd::LoadIteratorC + ><<< grid, block >>>( + params, + problem, + accumulator_matrix.device_data(), + accumulator_matrix.leading_dim() + ); + + destination_matrix.sync_host(); + + EXPECT_TRUE(accumulator_matrix.bit_equals(destination_matrix)) + << "Accumulators:\n" << accumulator_matrix << "\nDestination:\n" << destination_matrix; + } + + void run() { + run(cutlass::make_Coord(ThreadBlockTile::kD, ThreadBlockTile::kH, ThreadBlockTile::kW)); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Small epilogue +TEST(WmmaGemm_16x16x16, wmma_epilogue_basic) { + + // GEMM threadblock structure + typedef cutlass::Shape<16, 16, 16> ThreadBlockTile; + typedef cutlass::Shape<16, 16, 16> WarpTile; + typedef cutlass::Shape<16, 16, 16> WmmaTile; + + // Epilogue shapes + typedef cutlass::Shape<1, 16, 16> EpilogueTile; + typedef cutlass::Shape<1, 16, 16> StreamTile; + + typedef float AccumulatorType; + typedef float ScalarC; + + TestWmmaGemmEpilogue< + ThreadBlockTile, + WarpTile, + WmmaTile, + EpilogueTile, + StreamTile, + AccumulatorType, + ScalarC + >().run(); +} + +TEST(WmmaGemm_16x16x16, wmma_epilogue_ragged) { + + // GEMM threadblock structure + typedef cutlass::Shape<16, 16, 16> ThreadBlockTile; + typedef cutlass::Shape<16, 16, 16> WarpTile; + typedef cutlass::Shape<16, 16, 16> WmmaTile; + + // Epilogue shapes + typedef cutlass::Shape<1, 16, 16> EpilogueTile; + typedef cutlass::Shape<1, 16, 16> StreamTile; + + typedef float AccumulatorType; + typedef float ScalarC; + + TestWmmaGemmEpilogue< + ThreadBlockTile, + WarpTile, + WmmaTile, + EpilogueTile, + StreamTile, + AccumulatorType, + ScalarC + >().run(cutlass::make_Coord(0, 15, 15)); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Small epilogue +TEST(WmmaGemm_32x32x16, wmma_epilogue_basic_32x32_32x32) { + + // GEMM threadblock structure + typedef cutlass::Shape<16, 32, 32> ThreadBlockTile; + typedef cutlass::Shape<16, 32, 32> WarpTile; + typedef cutlass::Shape<16, 16, 16> WmmaTile; + + // Epilogue shapes + typedef cutlass::Shape<1, 32, 32> EpilogueTile; + typedef cutlass::Shape<1, 4, 32> StreamTile; + + typedef float AccumulatorType; + typedef float ScalarC; + + TestWmmaGemmEpilogue< + ThreadBlockTile, + WarpTile, + WmmaTile, + EpilogueTile, + StreamTile, + AccumulatorType, + ScalarC + >().run(); +} + +/// Small epilogue +TEST(WmmaGemm_32x32x16, wmma_epilogue_basic_32x32_32x32_ragged) { + + // GEMM threadblock structure + typedef cutlass::Shape<16, 32, 32> ThreadBlockTile; + typedef cutlass::Shape<16, 32, 32> WarpTile; + typedef cutlass::Shape<16, 16, 16> WmmaTile; + + // Epilogue shapes + typedef cutlass::Shape<1, 32, 32> EpilogueTile; + typedef cutlass::Shape<1, 4, 32> StreamTile; + + typedef float AccumulatorType; + typedef float ScalarC; + + TestWmmaGemmEpilogue< + ThreadBlockTile, + WarpTile, + WmmaTile, + EpilogueTile, + StreamTile, + AccumulatorType, + ScalarC + >().run(cutlass::make_Coord(0, 14, 17)); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Small epilogue +TEST(WmmaGemm_32x32x16, wmma_epilogue_basic_32x32_16x16) { + + // GEMM threadblock structure + typedef cutlass::Shape<16, 32, 32> ThreadBlockTile; + typedef cutlass::Shape<16, 16, 16> WarpTile; + typedef cutlass::Shape<16, 16, 16> WmmaTile; + + // Epilogue shapes + typedef cutlass::Shape<1, 32, 32> EpilogueTile; + typedef cutlass::Shape<1, 4, 32> StreamTile; + + typedef float AccumulatorType; + typedef float ScalarC; + + TestWmmaGemmEpilogue< + ThreadBlockTile, + WarpTile, + WmmaTile, + EpilogueTile, + StreamTile, + AccumulatorType, + ScalarC + >().run(); +} + +/// Small epilogue +TEST(WmmaGemm_32x32x16, wmma_epilogue_basic_32x32_16x16_ragged) { + + // GEMM threadblock structure + typedef cutlass::Shape<16, 32, 32> ThreadBlockTile; + typedef cutlass::Shape<16, 16, 16> WarpTile; + typedef cutlass::Shape<16, 16, 16> WmmaTile; + + // Epilogue shapes + typedef cutlass::Shape<1, 32, 32> EpilogueTile; + typedef cutlass::Shape<1, 4, 32> StreamTile; + + typedef float AccumulatorType; + typedef float ScalarC; + + TestWmmaGemmEpilogue< + ThreadBlockTile, + WarpTile, + WmmaTile, + EpilogueTile, + StreamTile, + AccumulatorType, + ScalarC + >().run(cutlass::make_Coord(0, 23, 19)); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Large epilogue +TEST(WmmaGemm_128x128x16, wmma_epilogue_basic_32x32_16x16) { + + // GEMM threadblock structure + typedef cutlass::Shape<16, 128, 128> ThreadBlockTile; + typedef cutlass::Shape<16, 32, 64> WarpTile; + typedef cutlass::Shape<16, 16, 16> WmmaTile; + + // Epilogue shapes + typedef cutlass::Shape<1, 64, 64> EpilogueTile; + typedef cutlass::Shape<1, 4, 64> StreamTile; + + typedef float AccumulatorType; + typedef float ScalarC; + + typedef cutlass::gemm::WmmaGemmEpilogueStructure< + ThreadBlockTile, + EpilogueTile, + StreamTile, + WarpTile, + WmmaTile + > Structure; + + TestWmmaGemmEpilogue< + ThreadBlockTile, + WarpTile, + WmmaTile, + EpilogueTile, + StreamTile, + AccumulatorType, + ScalarC + >().run(); +} + +/// Large epilogue +TEST(WmmaGemm_128x128x16, wmma_epilogue_basic_32x32_16x16_ragged) { + + // GEMM threadblock structure + typedef cutlass::Shape<16, 128, 128> ThreadBlockTile; + typedef cutlass::Shape<16, 32, 64> WarpTile; + typedef cutlass::Shape<16, 16, 16> WmmaTile; + + // Epilogue shapes + typedef cutlass::Shape<1, 64, 64> EpilogueTile; + typedef cutlass::Shape<1, 4, 64> StreamTile; + + typedef float AccumulatorType; + typedef float ScalarC; + + typedef cutlass::gemm::WmmaGemmEpilogueStructure< + ThreadBlockTile, + EpilogueTile, + StreamTile, + WarpTile, + WmmaTile + > Structure; + + TestWmmaGemmEpilogue< + ThreadBlockTile, + WarpTile, + WmmaTile, + EpilogueTile, + StreamTile, + AccumulatorType, + ScalarC + >().run(cutlass::make_Coord(0, 119, 101)); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#endif // end guard conditional on SM70 diff --git a/tools/test/unit/gemm/wmma_gemm_fragment_stream.cu b/tools/test/unit/gemm/wmma_gemm_fragment_stream.cu new file mode 100644 index 000000000..446dedcae --- /dev/null +++ b/tools/test/unit/gemm/wmma_gemm_fragment_stream.cu @@ -0,0 +1,504 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// Guard conditions around the entire file. +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700 + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#include "cutlass_unit_tests.h" + +#include "tools/util/half.h" +#include "tools/util/tensor_view_io.h" +#include "tools/util/host_tensor.h" + +#include "tools/test/unit/gemm/gemm_testbed.h" + +#include "cutlass/gemm/gemm_fragment_stream.h" +#include "cutlass/gemm/warp_multiply_add_nvcuda.h" + + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace test { + +template +__global__ void fragment_stream(typename FragmentStream::Params params, half *output) { + + __shared__ typename FragmentStream::Storage storage; + + params.store_params.initialize(storage); + FragmentStream stream( + params, + cutlass::make_Coord(16, 256, 256) + ); + + // load + stream.load(); + + // store + stream.commit(); + + __syncthreads(); + + // one thread writes it all out + if (threadIdx.x == 0) { + + half const *ptr = reinterpret_cast(storage.data()); + + CUTLASS_PRAGMA_NO_UNROLL + for (int i = 0; i < FragmentStream::Storage::Shape::kCount; ++i) { + output[i] = ptr[i]; + } + } +} + +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +struct TestGemmDesc { + int m, n, k; + inline __host__ __device__ TestGemmDesc() : m(0), n(0), k(0) {} +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + typename ThreadBlockTile, + cutlass::MatrixLayout::Kind LayoutA, + cutlass::MatrixLayout::Kind LayoutB, + int Threads, + int ScalarsPerAccess +> +struct TestGemmFragmentStream { + + /// TileStream for Operand A + typedef cutlass::gemm::GemmFragmentStreamTraits< + cutlass::gemm::GemmOperand::kA, + uint16_t, + LayoutA, + ThreadBlockTile, + Threads, + ScalarsPerAccess + > FragmentStreamTraitsA; + + /// Defines fragment stream for A operand + typedef typename cutlass::gemm::GemmFragmentStream FragmentStreamA; + + /// TileStream for Operand B + typedef typename cutlass::gemm::GemmFragmentStreamTraits< + cutlass::gemm::GemmOperand::kB, + uint16_t, + LayoutB, + ThreadBlockTile, + Threads, + ScalarsPerAccess + > FragmentStreamTraitsB; + + /// Defines fragment stream for A operand + typedef typename cutlass::gemm::GemmFragmentStream FragmentStreamB; + + // + // Data members + // + + cutlass::HostTensor tensor_A_in; + cutlass::HostTensor tensor_A_out; + + cutlass::HostTensor tensor_B_in; + cutlass::HostTensor tensor_B_out; + + // + // Methods + // + + /// Constructor + TestGemmFragmentStream() { + tensor_A_in.resize_matrix(ThreadBlockTile::kW, ThreadBlockTile::kD, LayoutA); + tensor_A_out.resize_matrix(ThreadBlockTile::kW, ThreadBlockTile::kD, LayoutA); + + tensor_B_in.resize_matrix(ThreadBlockTile::kD, ThreadBlockTile::kH, LayoutB); + tensor_B_out.resize_matrix(ThreadBlockTile::kD, ThreadBlockTile::kH, LayoutB); + } + + /// Writes details about TileStream + template + std::ostream & write(std::ostream &out, typename TileStream::Params const ¶ms) { + + out << "TileStream::LoadIterator\n" + << " Tile(" << TileStream::LoadIterator::Tile::kH << ", " + << TileStream::LoadIterator::Tile::kW << ")\n" + << " Delta(" << TileStream::LoadIterator::Steps::kH << ", " + << TileStream::LoadIterator::Steps::kW << ")\n" + << " Iterations(" << TileStream::LoadIterator::Iterations::kH << ", " + << TileStream::LoadIterator::Iterations::kW << ")\n"; + + out + << " stride_h: " << params.load_params.stride_h << "\n" + << " stride_w: " << params.load_params.stride_w << "\n" + << " inc_d: " << params.load_params.inc_d << "\n" + << " inc_h: " << params.load_params.inc_h << "\n" + << " inc_w: " << params.load_params.inc_w << std::endl; + + out << "output elements: " << TileStream::Storage::Shape::kCount << std::endl; + + return out; + } + + /// Runs test + void run() { + + tensor_A_in.fill_linear( + LayoutA == cutlass::MatrixLayout::kColumnMajor ? + cutlass::make_Coord(1, 1, ThreadBlockTile::kW, 1) : + cutlass::make_Coord(1, ThreadBlockTile::kD, 1, 1)); + + tensor_A_out.fill(0); + + tensor_A_in.sync_device(); + tensor_A_out.sync_device(); + + tensor_B_in.fill_linear( + LayoutB == cutlass::MatrixLayout::kColumnMajor ? + cutlass::make_Coord(1, 1, ThreadBlockTile::kD, 1) : + cutlass::make_Coord(1, ThreadBlockTile::kH, 1, 1)); + + tensor_B_out.fill(0); + + tensor_B_in.sync_device(); + tensor_B_out.sync_device(); + + + typename FragmentStreamA::Params params_A; + typename FragmentStreamB::Params params_B; + + TestGemmDesc desc; + params_A.initialize( + desc, + reinterpret_cast(tensor_A_in.device_ref().data()), + tensor_A_in.leading_dim() + ); + + params_B.initialize( + desc, + reinterpret_cast(tensor_A_in.device_ref().data()), + tensor_B_in.leading_dim() + ); + + test::fragment_stream<<< dim3(1,1,1), dim3(Threads,1,1) >>>( + params_A, + tensor_A_out.device_data() + ); + + test::fragment_stream<<< dim3(1,1,1), dim3(Threads,1,1) >>>( + params_B, + tensor_B_out.device_data() + ); + + tensor_A_out.sync_host(); + tensor_B_out.sync_host(); + + bool passed_A = tensor_A_in.bit_equals(tensor_A_out); + bool passed_B = tensor_B_in.bit_equals(tensor_B_out); + + EXPECT_TRUE(passed_A) << tensor_A_out; + if (!passed_A) { + this->template write(std::cout, params_A); + } + + EXPECT_TRUE(passed_B) << "In: " << tensor_B_in << "\n, Out:\n" << tensor_B_out; + if (!passed_B) { + this->template write(std::cout, params_B); + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemmFragmentStream, half_32x32x16_col_row) { + + TestGemmFragmentStream< + cutlass::Shape<16, 32, 32>, + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor, + 32, + 2 + >().run(); +} + +TEST(WmmaGemmFragmentStream, half_128x64x16_col_row) { + + TestGemmFragmentStream< + cutlass::Shape<16, 64, 128>, + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor, + 32, + 2 + >().run(); +} + +TEST(WmmaGemmFragmentStream, half_256x128x16_col_row) { + + TestGemmFragmentStream< + cutlass::Shape<16, 128, 256>, + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor, + 32, + 1 + >().run(); + + TestGemmFragmentStream< + cutlass::Shape<16, 128, 256>, + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor, + 64, + 2 + >().run(); + + TestGemmFragmentStream< + cutlass::Shape<16, 128, 256>, + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor, + 128, + 4 + >().run(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemmFragmentStream, half_32x32x16_col_col) { + + TestGemmFragmentStream< + cutlass::Shape<16, 32, 32>, + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kColumnMajor, + 32, + 2 + >().run(); +} + +TEST(WmmaGemmFragmentStream, half_128x64x16_col_col) { + + TestGemmFragmentStream< + cutlass::Shape<16, 64, 128>, + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kColumnMajor, + 32, + 2 + >().run(); +} + +TEST(WmmaGemmFragmentStream, half_256x128x16_col_col) { + + TestGemmFragmentStream< + cutlass::Shape<16, 128, 256>, + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kColumnMajor, + 32, + 1 + >().run(); + + TestGemmFragmentStream< + cutlass::Shape<16, 128, 256>, + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kColumnMajor, + 64, + 2 + >().run(); + + TestGemmFragmentStream< + cutlass::Shape<16, 128, 256>, + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kColumnMajor, + 128, + 4 + >().run(); + + TestGemmFragmentStream< + cutlass::Shape<16, 128, 256>, + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kColumnMajor, + 128, + 8 + >().run(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemmFragmentStream, half_32x32x16_row_col) { + + TestGemmFragmentStream< + cutlass::Shape<16, 32, 32>, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kColumnMajor, + 32, + 2 + >().run(); +} + +TEST(WmmaGemmFragmentStream, half_128x64x16_row_col) { + + TestGemmFragmentStream< + cutlass::Shape<16, 64, 128>, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kColumnMajor, + 32, + 2 + >().run(); +} + +TEST(WmmaGemmFragmentStream, half_256x128x16_row_col) { + + TestGemmFragmentStream< + cutlass::Shape<16, 128, 256>, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kColumnMajor, + 32, + 2 + >().run(); + + TestGemmFragmentStream< + cutlass::Shape<16, 128, 256>, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kColumnMajor, + 64, + 4 + >().run(); + + TestGemmFragmentStream< + cutlass::Shape<16, 128, 256>, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kColumnMajor, + 128, + 8 + >().run(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemmFragmentStream, half_32x32x16_row_row) { + + TestGemmFragmentStream< + cutlass::Shape<16, 32, 32>, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kRowMajor, + 32, + 2 + >().run(); +} + +TEST(WmmaGemmFragmentStream, half_128x64x16_row_row) { + + TestGemmFragmentStream< + cutlass::Shape<16, 64, 128>, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kRowMajor, + 32, + 2 + >().run(); +} + +TEST(WmmaGemmFragmentStream, half_256x128x16_row_row) { + + TestGemmFragmentStream< + cutlass::Shape<16, 128, 256>, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kRowMajor, + 32, + 2 + >().run(); + + TestGemmFragmentStream< + cutlass::Shape<16, 128, 256>, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kRowMajor, + 64, + 4 + >().run(); + + TestGemmFragmentStream< + cutlass::Shape<16, 128, 256>, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kRowMajor, + 128, + 8 + >().run(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemmFragmentStream, half4_32x32x16_row_row) { + + TestGemmFragmentStream< + cutlass::Shape<16, 32, 32>, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kRowMajor, + 32, + 4 + >().run(); +} + +TEST(WmmaGemmFragmentStream, half4_128x64x16_row_row) { + + TestGemmFragmentStream< + cutlass::Shape<16, 64, 128>, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kRowMajor, + 32, + 4 + >().run(); +} + +TEST(WmmaGemmFragmentStream, half4_256x128x16_row_row) { + + TestGemmFragmentStream< + cutlass::Shape<16, 128, 256>, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kRowMajor, + 32, + 4 + >().run(); + + TestGemmFragmentStream< + cutlass::Shape<16, 128, 256>, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kRowMajor, + 64, + 4 + >().run(); + + TestGemmFragmentStream< + cutlass::Shape<16, 128, 256>, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kRowMajor, + 128, + 8 + >().run(); +} + +#endif diff --git a/tools/test/unit/gemm/wmma_gemm_multiply_add.cu b/tools/test/unit/gemm/wmma_gemm_multiply_add.cu new file mode 100644 index 000000000..9ca4df9fa --- /dev/null +++ b/tools/test/unit/gemm/wmma_gemm_multiply_add.cu @@ -0,0 +1,629 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#include "cutlass/wmma_matrix.h" + +#ifdef CUTLASS_USE_WMMA_API + +#include "cutlass_unit_tests.h" +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/util/half.h" + +#include "cutlass/gemm/gemm_global_stream.h" +#include "cutlass/gemm/gemm_shared_stream.h" +#include "cutlass/gemm/wmma_gemm_multiply_add.h" +#include "cutlass/gemm/wmma_gemm_global_tile.h" +#include "cutlass/gemm/wmma_gemm_shared_tile.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +struct ProblemDesc { + int m, n, k; + inline __device__ ProblemDesc(int m_, int n_, int k_) : m(m_), n(n_), k(k_) {} +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +union SharedStorage { + // Storage to store the data. + typename StoreIterator_::SharedStorage store; + // Storage to load the data. + typename LoadIterator_::SharedStorage load; +}; + +template struct Debug {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template kW_)> +struct ReshapeThreadsA { + typedef cutlass::Shape Threads; +}; + +template +struct ReshapeThreadsA { + typedef cutlass::Shape Threads; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template kH_)> +struct ReshapeThreadsB { + typedef cutlass::Shape Threads; +}; + +template +struct ReshapeThreadsB { + typedef cutlass::Shape Threads; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#if 1 +template +static __global__ void kernel_nt(half const *d_a, int lda, half const *d_b, int ldb, float *d_c, + int ldc) { +#if 0 + // The default configuration of threads. + typedef cutlass::Shape<1, Warps_::kCount, 32> Threads_; + // The threads. + typedef typename ReshapeThreadsA::Threads ThreadsA; + // The threads. + typedef typename ReshapeThreadsB::Threads ThreadsB; + // The number of elements loaded per LDG. + int const kScalarsPerLdg = 1; + // The tile for A. + typedef cutlass::Shape<1, OutputTile_::kD, OutputTile_::kW> TileA; + // The tile for B. + typedef cutlass::Shape<1, OutputTile_::kD, OutputTile_::kH> TileB; + // The tile for C. + typedef cutlass::Shape<1, Warps_::kH*WmmaShape_::kH, OutputTile_::kW> TileC; +#endif + + // The problem descriptor. + ProblemDesc desc(Traits_::OutputTile::kW, Traits_::OutputTile::kH, Traits::OutputTile::kD); + + // The elements computed by a single warp. + typedef typename cutlass::ShapeDiv::Shape AccumulatorsPerWarp; + + // Global memory load for A. + typedef cutlass::gemm::GemmGlobalIteratorAb< + cutlass::gemm::GemmGlobalIteratorTraits< + cutlass::GemmOperand::kA, cutlass::MatrixLayout::kColumnMajor, half const, TileA, ThreadsA, kScalarsPerLdg> + > + GlobalLoadIteratorA; + + // Shared store iterator for A. + typedef cutlass::gemm::GemmSharedStoreIteratorAb< + cutlass::gemm::GemmSharedStoreIteratorAbTraits< + half, TileA, ThreadsA, kScalarsPerLdg> + > + SharedStoreIteratorA; + + // The global stream for A. + typedef cutlass::gemm::GlobalLoadStream< + GlobalLoadIteratorA, + cutlass::Copy, + SharedStoreIteratorA> + GlobalLoadStreamA; + + // Shared load iterator for A. + typedef cutlass::gemm::WmmaGemmSharedLoadIteratorA< + cutlass::gemm::WmmaGemmSharedLoadIteratorAbTraits< + cutlass::GemmOperand::kA, + cutlass::MatrixLayout::kColumnMajor, half, OutputTile_, Warps_, WmmaShape_> > + SharedLoadIteratorA; + + // Global memory load for B. + typedef cutlass::gemm::GemmGlobalIteratorAb< + cutlass::gemm::GemmGlobalIteratorTraits< + cutlass::GemmOperand::kB, cutlass::MatrixLayout::kRowMajor, half const, TileB, ThreadsB, kScalarsPerLdg> > + GlobalLoadIteratorB; + + // Shared store iterator for B. + typedef cutlass::gemm::GemmSharedStoreIteratorAb< + cutlass::gemm::GemmSharedStoreIteratorAbTraits< + half, TileB, ThreadsB, kScalarsPerLdg> > + SharedStoreIteratorB; + + // The global stream for B. + typedef cutlass::gemm::GlobalLoadStream, + SharedStoreIteratorB> + GlobalLoadStreamB; + + // Shared load iterator for B. + typedef cutlass::gemm::WmmaGemmSharedLoadIteratorB< + cutlass::gemm::WmmaGemmSharedLoadIteratorAbTraits< + cutlass::GemmOperand::kB, + cutlass::MatrixLayout::kRowMajor, half, OutputTile_, Warps_, WmmaShape_> > + SharedLoadIteratorB; + + // Share memory to exchange data for A. + __shared__ SharedStorage shared_storage_a; + + // Share memory to exchange data for B. + __shared__ SharedStorage shared_storage_b; + + // Iterator to load A. + typename GlobalLoadStreamA::Params global_params_a; + global_params_a.initialize(desc, d_a, lda); + GlobalLoadStreamA global_load_a(global_params_a, shared_storage_a.store, desc.m, desc.n, desc.k, + cutlass::make_Coord(0, 0, 0)); + + // Iterator to load B. + typename GlobalLoadStreamB::Params global_params_b; + global_params_b.initialize(desc, d_b, ldb); + GlobalLoadStreamB global_load_b(global_params_b, shared_storage_b.store, desc.m, desc.n, desc.k, + cutlass::make_Coord(0, 0, 0)); + + // Load A/B. + global_load_a.copy(); + global_load_b.copy(); + + // Copy to shared memory. + global_load_a.commit(); + global_load_b.commit(); + + // Make sure the data is in shared memory. + __syncthreads(); + + // Load iterator A. + typename SharedLoadIteratorA::Params shared_params_a; + shared_params_a.initialize(desc); + SharedLoadIteratorA shared_load_a(shared_params_a, shared_storage_a.load); + + // Load iterator B. + typename SharedLoadIteratorB::Params shared_params_b; + shared_params_b.initialize(desc); + SharedLoadIteratorB shared_load_b(shared_params_b, shared_storage_b.load); + + // Copy A from shared memory. + typename SharedLoadIteratorA::Fragment fragment_a; + cutlass::gemm::load_shared(shared_load_a, fragment_a); + + // Copy B from shared memory. + typename SharedLoadIteratorB::Fragment fragment_b; + cutlass::gemm::load_shared(shared_load_b, fragment_b); + + // The functor to do WMMA. + typedef cutlass::gemm::WmmaGemmMultiplyAdd< + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kColumnMajor, + float, + AccumulatorsPerWarp, + WmmaShape_> WmmaGemmMultiplyAdd; + + // The output fragment. + typename WmmaGemmMultiplyAdd::Accumulators fragment_c; + fragment_c.clear(); + + // Do the WMMA. + WmmaGemmMultiplyAdd multiply_add; + multiply_add.multiply_add(fragment_a, fragment_b, fragment_c, fragment_c); + + // Global memory stream to store D. + typedef cutlass::gemm::WmmaGemmGlobalIteratorCd< + cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< + float, TileC, ThreadsA, 1> + > + GlobalStoreIteratorD; + typedef cutlass::gemm::GlobalStoreStream GlobalStoreStreamD; + + // The shared memory to store D. + __shared__ typename GlobalStoreStreamD::SharedStorage shared_storage_stream_d; + + // Iterator to store C. + typename GlobalStoreStreamD::Params global_params_d; + global_params_d.initialize(desc, d_c, ldc); + GlobalStoreStreamD global_store_d(global_params_d, shared_storage_stream_d, desc.m, desc.n, desc.k, + cutlass::make_Coord(0, 0, 0)); + + // Shared store iterator/stream for C. + typedef cutlass::gemm::WmmaGemmSharedStoreIteratorD< + cutlass::gemm::WmmaGemmSharedStoreIteratorDTraits< + cutlass::MatrixLayout::kColumnMajor, float, OutputTile_, Warps_, WmmaShape_> > + SharedStoreIteratorD; + typedef cutlass::gemm::SharedStoreStream SharedStoreStreamD; + + // Shared load iterator/stream for D. + typedef cutlass::gemm::WmmaGemmSharedLoadIteratorD< + cutlass::gemm::WmmaGemmSharedLoadIteratorDTraits< + float, typename SharedStoreIteratorD::Tile, ThreadsA, 1> > + SharedLoadIteratorD; + typedef cutlass::gemm::SharedLoadStream SharedLoadStreamD; + + // The shared memory structure to swizzle D. + union SharedStorageD { + typename SharedStoreStreamD::SharedStorage store; + typename SharedLoadStreamD::SharedStorage load; + }; + + // The shared memory for D. + __shared__ SharedStorageD shared_storage_d; + + // Store iterator D. + typename SharedStoreStreamD::Params shared_store_params_d; + shared_store_params_d.initialize(); + + // Store iterator D. + typename SharedLoadStreamD::Params shared_load_params_d; + shared_load_params_d.initialize(); + + // The number of WMMA in the tile H/W dimension (N/M in GEMM). + int const kWmmaPerH = OutputTile_::kH / Warps_::kH / WmmaShape_::kH; + int const kWmmaPerW = OutputTile_::kW / Warps_::kW / WmmaShape_::kW; + + // Iterate over the data. + for (int i = 0; i < kWmmaPerH; ++i) { + // Make sure the shared memory can be written to. + __syncthreads(); + + // Create the iterator to store to SMEM. + SharedStoreStreamD shared_store_d(shared_store_params_d, + shared_storage_d.store, + fragment_c, + i*kWmmaPerW); + shared_store_d.copy(); + shared_store_d.commit(); + + // Make sure the shared memory was written. + __syncthreads(); + + // Create the iterator to load from SMEM. + SharedLoadStreamD shared_load_d(shared_load_params_d, shared_storage_d.load); + shared_load_d.copy(); + shared_load_d.commit(); + + // Copy the data. + cutlass::Copy copy; + copy.transform(shared_load_d.fragment(), global_store_d.fragment()); + + // Copy the data to global memory. + global_store_d.copy(); + global_store_d.commit(); + } +} +#else +template +static __global__ void kernel_nt(half const *d_a, int lda, half const *d_b, int ldb, float *d_c, + int ldc) { + // The default configuration of threads. + typedef cutlass::Shape<1, Warps_::kCount, 32> Threads_; + // The threads. + typedef typename ReshapeThreadsA::Threads ThreadsA; + // The threads. + typedef typename ReshapeThreadsB::Threads ThreadsB; + // The number of elements loaded per LDG. + int const kScalarsPerLdg = 1; + // The tile for A. + typedef cutlass::Shape<1, OutputTile_::kD, OutputTile_::kW> TileA; + // The tile for B. + typedef cutlass::Shape<1, OutputTile_::kD, OutputTile_::kH> TileB; + // The tile for C. + typedef cutlass::Shape<1, Warps_::kH*WmmaShape_::kH, OutputTile_::kW> TileC; + + // The problem descriptor. + ProblemDesc desc(OutputTile_::kW, OutputTile_::kH, OutputTile_::kD); + + // The elements computed by a single warp. + typedef typename cutlass::ShapeDiv::Shape AccumulatorsPerWarp; + + // Global memory load for A. + typedef cutlass::gemm::GemmGlobalIteratorAb< + cutlass::gemm::GemmGlobalIteratorTraits< + cutlass::GemmOperand::kA, cutlass::MatrixLayout::kColumnMajor, half const, TileA, ThreadsA, kScalarsPerLdg> + > + GlobalLoadIteratorA; + + // Shared store iterator for A. + typedef cutlass::gemm::GemmSharedStoreIteratorAb< + cutlass::gemm::GemmSharedStoreIteratorAbTraits< + half, TileA, ThreadsA, kScalarsPerLdg> + > + SharedStoreIteratorA; + + // The global stream for A. + typedef cutlass::gemm::GlobalLoadStream< + GlobalLoadIteratorA, + cutlass::Copy, + SharedStoreIteratorA> + GlobalLoadStreamA; + + // Shared load iterator for A. + typedef cutlass::gemm::WmmaGemmSharedLoadIteratorA< + cutlass::gemm::WmmaGemmSharedLoadIteratorAbTraits< + cutlass::GemmOperand::kA, + cutlass::MatrixLayout::kColumnMajor, half, OutputTile_, Warps_, WmmaShape_> > + SharedLoadIteratorA; + + // Global memory load for B. + typedef cutlass::gemm::GemmGlobalIteratorAb< + cutlass::gemm::GemmGlobalIteratorTraits< + cutlass::GemmOperand::kB, cutlass::MatrixLayout::kRowMajor, half const, TileB, ThreadsB, kScalarsPerLdg> > + GlobalLoadIteratorB; + + // Shared store iterator for B. + typedef cutlass::gemm::GemmSharedStoreIteratorAb< + cutlass::gemm::GemmSharedStoreIteratorAbTraits< + half, TileB, ThreadsB, kScalarsPerLdg> > + SharedStoreIteratorB; + + // The global stream for B. + typedef cutlass::gemm::GlobalLoadStream, + SharedStoreIteratorB> + GlobalLoadStreamB; + + // Shared load iterator for B. + typedef cutlass::gemm::WmmaGemmSharedLoadIteratorB< + cutlass::gemm::WmmaGemmSharedLoadIteratorAbTraits< + cutlass::GemmOperand::kB, + cutlass::MatrixLayout::kRowMajor, half, OutputTile_, Warps_, WmmaShape_> > + SharedLoadIteratorB; + + // Share memory to exchange data for A. + __shared__ SharedStorage shared_storage_a; + + // Share memory to exchange data for B. + __shared__ SharedStorage shared_storage_b; + + // Iterator to load A. + typename GlobalLoadStreamA::Params global_params_a; + global_params_a.initialize(desc, d_a, lda); + GlobalLoadStreamA global_load_a(global_params_a, shared_storage_a.store, desc.m, desc.n, desc.k, + cutlass::make_Coord(0, 0, 0)); + + // Iterator to load B. + typename GlobalLoadStreamB::Params global_params_b; + global_params_b.initialize(desc, d_b, ldb); + GlobalLoadStreamB global_load_b(global_params_b, shared_storage_b.store, desc.m, desc.n, desc.k, + cutlass::make_Coord(0, 0, 0)); + + // Load A/B. + global_load_a.copy(); + global_load_b.copy(); + + // Copy to shared memory. + global_load_a.commit(); + global_load_b.commit(); + + // Make sure the data is in shared memory. + __syncthreads(); + + // Load iterator A. + typename SharedLoadIteratorA::Params shared_params_a; + shared_params_a.initialize(desc); + SharedLoadIteratorA shared_load_a(shared_params_a, shared_storage_a.load); + + // Load iterator B. + typename SharedLoadIteratorB::Params shared_params_b; + shared_params_b.initialize(desc); + SharedLoadIteratorB shared_load_b(shared_params_b, shared_storage_b.load); + + // Copy A from shared memory. + typename SharedLoadIteratorA::Fragment fragment_a; + cutlass::gemm::load_shared(shared_load_a, fragment_a); + + // Copy B from shared memory. + typename SharedLoadIteratorB::Fragment fragment_b; + cutlass::gemm::load_shared(shared_load_b, fragment_b); + + // The functor to do WMMA. + typedef cutlass::gemm::WmmaGemmMultiplyAdd< + cutlass::MatrixLayout::kColumnMajor, + cutlass::MatrixLayout::kRowMajor, + cutlass::MatrixLayout::kColumnMajor, + float, + AccumulatorsPerWarp, + WmmaShape_> WmmaGemmMultiplyAdd; + + // The output fragment. + typename WmmaGemmMultiplyAdd::Accumulators fragment_c; + fragment_c.clear(); + + // Do the WMMA. + WmmaGemmMultiplyAdd multiply_add; + multiply_add.multiply_add(fragment_a, fragment_b, fragment_c, fragment_c); + + // Global memory stream to store D. + typedef cutlass::gemm::WmmaGemmGlobalIteratorCd< + cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< + float, TileC, ThreadsA, 1> + > + GlobalStoreIteratorD; + typedef cutlass::gemm::GlobalStoreStream GlobalStoreStreamD; + + // The shared memory to store D. + __shared__ typename GlobalStoreStreamD::SharedStorage shared_storage_stream_d; + + // Iterator to store C. + typename GlobalStoreStreamD::Params global_params_d; + global_params_d.initialize(desc, d_c, ldc); + GlobalStoreStreamD global_store_d(global_params_d, shared_storage_stream_d, desc.m, desc.n, desc.k, + cutlass::make_Coord(0, 0, 0)); + + // Shared store iterator/stream for C. + typedef cutlass::gemm::WmmaGemmSharedStoreIteratorD< + cutlass::gemm::WmmaGemmSharedStoreIteratorDTraits< + cutlass::MatrixLayout::kColumnMajor, float, OutputTile_, Warps_, WmmaShape_> > + SharedStoreIteratorD; + typedef cutlass::gemm::SharedStoreStream SharedStoreStreamD; + + // Shared load iterator/stream for D. + typedef cutlass::gemm::WmmaGemmSharedLoadIteratorD< + cutlass::gemm::WmmaGemmSharedLoadIteratorDTraits< + float, typename SharedStoreIteratorD::Tile, ThreadsA, 1> > + SharedLoadIteratorD; + typedef cutlass::gemm::SharedLoadStream SharedLoadStreamD; + + // The shared memory structure to swizzle D. + union SharedStorageD { + typename SharedStoreStreamD::SharedStorage store; + typename SharedLoadStreamD::SharedStorage load; + }; + + // The shared memory for D. + __shared__ SharedStorageD shared_storage_d; + + // Store iterator D. + typename SharedStoreStreamD::Params shared_store_params_d; + shared_store_params_d.initialize(); + + // Store iterator D. + typename SharedLoadStreamD::Params shared_load_params_d; + shared_load_params_d.initialize(); + + // The number of WMMA in the tile H/W dimension (N/M in GEMM). + int const kWmmaPerH = OutputTile_::kH / Warps_::kH / WmmaShape_::kH; + int const kWmmaPerW = OutputTile_::kW / Warps_::kW / WmmaShape_::kW; + + // Iterate over the data. + for (int i = 0; i < kWmmaPerH; ++i) { + // Make sure the shared memory can be written to. + __syncthreads(); + + // Create the iterator to store to SMEM. + SharedStoreStreamD shared_store_d(shared_store_params_d, + shared_storage_d.store, + fragment_c, + i*kWmmaPerW); + shared_store_d.copy(); + shared_store_d.commit(); + + // Make sure the shared memory was written. + __syncthreads(); + + // Create the iterator to load from SMEM. + SharedLoadStreamD shared_load_d(shared_load_params_d, shared_storage_d.load); + shared_load_d.copy(); + shared_load_d.commit(); + + // Copy the data. + cutlass::Copy copy; + copy.transform(shared_load_d.fragment(), global_store_d.fragment()); + + // Copy the data to global memory. + global_store_d.copy(); + global_store_d.commit(); + } +} +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +void run() { + /// Testbed type. + typedef test::GemmTestbed GemmTestbed; + + // Create the testbed. + GemmTestbed testbed(OutputTile_::kW, // M + OutputTile_::kH, // N + OutputTile_::kD, // K + cutlass::convert(cutlass::MatrixLayout::kColumnMajor), + cutlass::convert(cutlass::MatrixLayout::kRowMajor), 1, 0, + CUBLAS_GEMM_DEFAULT_TENSOR_OP, + cutlass::convert(cutlass::MatrixLayout::kColumnMajor)); + + // Initialize. + testbed.initialize(); + + // Launch the kernel. + kernel_nt<<<1, 32*Warps_::kCount>>>( + testbed.ptr_A(), testbed.lda(), + testbed.ptr_B(), testbed.ldb(), + testbed.ptr_computed(), testbed.ldc()); + ASSERT_EQ(cudaSuccess, cudaGetLastError()); + + // Make sure it worked as expected. + ASSERT_TRUE(testbed.verify_with_host()); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemm, multiply_add_f32_16x16x16_16x16x16) { + run, cutlass::Shape<1, 1, 1>, cutlass::Shape<16, 16, 16> >(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemm, multiply_add_f32_16x32x16_16x16x16) { + run, cutlass::Shape<1, 1, 1>, cutlass::Shape<16, 16, 16> >(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemm, multiply_add_f32_32x16x16_16x16x16) { + run, cutlass::Shape<1, 1, 1>, cutlass::Shape<16, 16, 16> >(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemm, multiply_add_f32_64x16x16_16x16x16) { + run, cutlass::Shape<1, 1, 1>, cutlass::Shape<16, 16, 16> >(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemm, multiply_add_f32_64x64x16_16x16x16) { + run, cutlass::Shape<1, 1, 1>, cutlass::Shape<16, 16, 16> >(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemm, multiply_add_f32_128x128x16_16x16x16) { + run, cutlass::Shape<1, 2, 2>, cutlass::Shape<16, 16, 16> >(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemm, multiply_add_f32_32x8x16_32x8x16) { + run, cutlass::Shape<1, 1, 1>, cutlass::Shape<16, 8, 32> >(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemm, multiply_add_f32_128x128x16_32x8x16) { + run, cutlass::Shape<1, 2, 2>, cutlass::Shape<16, 8, 32> >(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemm, multiply_add_f32_8x32x16_8x32x16) { + run, cutlass::Shape<1, 1, 1>, cutlass::Shape<16, 32, 8> >(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaGemm, multiply_add_f32_128x128x16_8x32x16) { + run, cutlass::Shape<1, 2, 2>, cutlass::Shape<16, 32, 8> >(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#endif // defined CUTLASS_USE_WMMA_API diff --git a/tools/test/unit/gemm/wmma_integer_gemm.cu b/tools/test/unit/gemm/wmma_integer_gemm.cu new file mode 100644 index 000000000..857408c86 --- /dev/null +++ b/tools/test/unit/gemm/wmma_integer_gemm.cu @@ -0,0 +1,630 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#include "cutlass/wmma_matrix.h" +#ifdef CUTLASS_USE_SUBBYTE_WMMA + +#include "cutlass_unit_test.h" +#include "cutlass/gemm/gemm.h" +#include "cutlass/gemm/wmma_gemm_traits.h" + +#include "tools/test/unit/gemm/gemm_testbed.h" +#include "tools/test/unit/gemm/integer_gemm.h" + +/* + TEST(TestGroup, TestName) + + - TestGroup should follow this template: + WmmaIntegerGemm____ + + - TestName should follow this template + wmma_integer_gemm__{optional additional specifier(s)} + + - Shapes should be specified as MxNxK (opposite to the Shape<> definition which is KxNxM) +*/ + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// S4 Integer GEMM Unit Tests +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt4Gemm_32x32x64_8x8x32_s4, wmma_integer_gemm_32x32x64) { + + typedef cutlass::gemm::WmmaGemmTraits, + cutlass::Vector, + cutlass::Vector, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<64, 32, 32>, + cutlass::Shape<32, 8, 8>, + 8, + 8> + WmmaGemmTraits; + run_integer_gemm(32, 32, 64); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt4Gemm_32x32x256_8x8x32_s4, wmma_integer_gemm_128x128x256) { + + typedef cutlass::gemm::WmmaGemmTraits, + cutlass::Vector, + cutlass::Vector, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<256, 32, 32>, + cutlass::Shape<32, 8, 8>, + 32, + 32> + WmmaGemmTraits; + run_integer_gemm(128, 128, 256); +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// U4 Integer GEMM Unit Tests +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt4Gemm_32x32x64_8x8x32_u4, wmma_integer_gemm_32x32x64) { + + typedef cutlass::gemm::WmmaGemmTraits, + cutlass::Vector, + cutlass::Vector, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<64, 32, 32>, + cutlass::Shape<32, 8, 8>, + 8, + 8> + WmmaGemmTraits; + run_integer_gemm(32, 32, 64); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// S8 Integer GEMM Unit Tests +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// +// 16x16x16 +// + +TEST(WmmaInt8Gemm_32x32x32_16x16x16_s8_tn, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + signed char, + signed char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 16, 16>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_16x16x16_s8_tt, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + signed char, + signed char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 16, 16>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_16x16x16_s8_nt, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + signed char, + signed char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 16, 16>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_16x16x16_s8_nn, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + signed char, + signed char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 16, 16>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// +// 32x8x16 +// + +TEST(WmmaInt8Gemm_32x32x32_32x8x16_s8_tn, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + signed char, + signed char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 8, 32>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_32x8x16_s8_tt, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + signed char, + signed char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 8, 32>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_32x8x16_s8_nt, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + signed char, + signed char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 8, 32>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_32x8x16_s8_nn, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + signed char, + signed char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 8, 32>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// +// 8x32x16 +// + +TEST(WmmaInt8Gemm_32x32x32_8x32x16_s8_tn, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + signed char, + signed char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 32, 8>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_8x32x16_s8_tt, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + signed char, + signed char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 32, 8>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_8x32x16_s8_nt, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + signed char, + signed char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 32, 8>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_8x32x16_s8_nn, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + signed char, + signed char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 32, 8>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// U8 Integer GEMM Unit Tests +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// +// 16x16x16 +// + +TEST(WmmaInt8Gemm_32x32x32_16x16x16_u8_tn, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + unsigned char, + unsigned char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 16, 16>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_16x16x16_u8_tt, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + unsigned char, + unsigned char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 16, 16>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_16x16x16_u8_nt, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + unsigned char, + unsigned char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 16, 16>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_16x16x16_u8_nn, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + unsigned char, + unsigned char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 16, 16>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// +// 32x8x16 +// + +TEST(WmmaInt8Gemm_32x32x32_32x8x16_u8_tn, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + unsigned char, + unsigned char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 8, 32>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_32x8x16_u8_tt, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + unsigned char, + unsigned char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 8, 32>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_32x8x16_u8_nt, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + unsigned char, + unsigned char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 8, 32>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_32x8x16_u8_nn, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + unsigned char, + unsigned char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 8, 32>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// +// 8x32x16 +// + +TEST(WmmaInt8Gemm_32x32x32_8x32x16_u8_tn, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + unsigned char, + unsigned char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 32, 8>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_8x32x16_u8_tt, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + unsigned char, + unsigned char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 32, 8>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_8x32x16_u8_nt, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + unsigned char, + unsigned char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 32, 8>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(WmmaInt8Gemm_32x32x32_8x32x16_u8_nn, wmma_integer_gemm_32x32x32) { + + typedef cutlass::gemm::WmmaGemmTraits, + unsigned char, + unsigned char, + int, + cutlass::gemm::LinearScaling, + int, + cutlass::Shape<32, 32, 32>, + cutlass::Shape<16, 32, 8>, + 4, + 4> + WmmaGemmTraits; + run_integer_gemm(32, 32, 32); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#endif // ifdef CUTLASS_USE_SUBBYTE_WMMA diff --git a/tools/test/unit/util/complex.cu b/tools/test/unit/util/complex.cu new file mode 100644 index 000000000..12d840fdb --- /dev/null +++ b/tools/test/unit/util/complex.cu @@ -0,0 +1,102 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#include + +#include "cutlass_unit_test.h" +#include "cutlass/util/complex.h" +#include "tools/util/half.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace test { + + /// Thorough testing for basic complex math operators. Uses std::complex as a reference. + template + struct ComplexOperators { + ComplexOperators() { + for (int ar = -N; ar <= N; ++ar) { + for (int ai = -N; ai <= N; ++ai) { + for (int br = -N; br <= N; ++br) { + for (int bi = -N; bi <= N; ++bi) { + + cutlass::platform::complex Ae(T(ar) / T(M), T(ai) / T(M)); + cutlass::platform::complex Be(T(br) / T(M), T(bi) / T(M)); + + std::complex Ar(T(ar) / T(M), T(ai) / T(M)); + std::complex Br(T(br) / T(M), T(bi) / T(M)); + + cutlass::platform::complex add_e = Ae + Be; + cutlass::platform::complex sub_e = Ae - Be; + cutlass::platform::complex mul_e = Ae * Be; + + std::complex add_r = (Ar + Br); + std::complex sub_r = (Ar - Br); + std::complex mul_r = (Ar * Br); + + EXPECT_EQ(real(add_e), real(add_r)); + EXPECT_EQ(imag(add_e), imag(add_r)); + + EXPECT_EQ(real(sub_e), real(sub_r)); + EXPECT_EQ(imag(sub_e), imag(sub_r)); + + EXPECT_EQ(real(mul_e), real(mul_r)); + EXPECT_EQ(imag(mul_e), imag(mul_r)); + + if (!(br == 0 && bi == 0)) { + + cutlass::platform::complex div_e = Ae * Be; + std::complex div_r = Ar * Br; + + EXPECT_EQ(real(div_e), real(div_r)); + EXPECT_EQ(imag(div_e), imag(div_r)); + } + } + } + } + } + } + }; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Complex, host_float) { + test::ComplexOperators test; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Complex, host_double) { + test::ComplexOperators test; +} + +/////////////////////////////////////////////////////////////////////////////////////// + +TEST(Complex, host_half) { + // Fewer test cases since half_t is emulated + test::ComplexOperators test; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/util/host_tensor.cu b/tools/test/unit/util/host_tensor.cu index aff6648aa..ce3b22489 100644 --- a/tools/test/unit/util/host_tensor.cu +++ b/tools/test/unit/util/host_tensor.cu @@ -1,66 +1,342 @@ -/****************************************************************************** -* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. -* -* Redistribution and use in source and binary forms, with or without -* modification, are not permitted. -* -* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY -* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -* -******************************************************************************/ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/* \file -/*! \file - \brief Tests for Host_tensor, Host_tensor_view, and Tensor_view + \brief Defines unit tests for HostTensor and HostMatrix. + + HostTensor is a utility class for allocating memory on the host and on the selected CUDA device + and presenting a TensorView of this memory. + + HostMatrix is new in CUTLASS 1.1 that offers a matrix-like interface to a HostTensor with rank 2. + Several examples are shown in this source file. */ -//#include -#include -#include -#include +#include "cutlass_unit_test.h" -/// Random number generator -struct RandomGenerator { - RandomGenerator(int seed = 17) { - srand(seed); - } +#include "cutlass/matrix_traits.h" - float operator()() { - return float(rand() % 64) / 8.0f; - } -}; +#include "tools/util/tensor_view_io.h" +#include "tools/util/host_tensor.h" +#include "tools/util/host_matrix.h" -TEST(HostTensor, gemm) { +//////////////////////////////////////////////////////////////////////////////////////////////////// - int const M = 16; - int const N = 16; - int const K = 16; +namespace test { - typedef cutlass::HostTensor HostTensor; +/// Kernel to compute a thread's unique coordinate within a CUDA kernel grid and write a value +/// using a CUTLASS TensorView. +template +__global__ void fill_sequential(TensorView view) { - // allocate a host tensor - HostTensor A( - cutlass::make_Coord(1, K, M, 1) - ); + // Compute the thread's coordinate in the 2D CUDA kernel grid + cutlass::Coord<2> coord = cutlass::make_Coord( + blockIdx.x * blockDim.x + threadIdx.x, + blockIdx.y * blockDim.y + threadIdx.y + ); - HostTensor B( - cutlass::make_Coord(1, N, K, 1) - ); - - HostTensor C( - cutlass::make_Coord(1, N, M, 1) - ); - - A.fill_random(RandomGenerator()); - B.fill_random(RandomGenerator()); - - C.gemm(A, B, 1.0f, 0.0f); + // Write a value into the view + if (view.contains(coord)) { + view.at(coord) = coord[0] + view.size(0) * coord[1]; + } } +} // namespace test + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// This test constructs a CUTLASS HostTensor with column-major layout. +TEST(HostTensor, fill_sequential_column_major) { + + int const M = 16; + int const N = 32; + + cutlass::Coord<2> bounds = cutlass::make_Coord(M, N); + + // Construct a rank=2 host tensor of size M-by-N with leading dimension M + cutlass::HostTensor< + int, + 2, + cutlass::MatrixLayout::ColumnMajor> host_tensor(cutlass::make_Coord(M, 1), bounds); + + // Fill it with zeros and synchronize device + host_tensor.fill(0); + host_tensor.sync_device(); + + // Launch a CUDA kernel by obtaining a TensorView of the device memory + dim3 block(16, 16); + dim3 grid((M + block.x - 1) / block.x, (N + block.y - 1) / block.y); + + test::fill_sequential<<< grid, block >>>(host_tensor.device_view()); + + ASSERT_EQ(cudaDeviceSynchronize(), cudaSuccess); + + // Synchronize the host data + host_tensor.sync_host(); + + // Verify host_tensor contains sequential elements + int errors = 0; + for (int n = 0; n < N; ++n) { + for (int m = 0; m < M; ++m) { + int expected = m + n * M; + int got = host_tensor.at(cutlass::make_Coord(m, n)); + if (expected != got) { + ++errors; + } + } + } + + EXPECT_EQ(errors, 0) << std::setw(4) << host_tensor << std::endl; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// This test constructs a CUTLASS HostTensor with column-major interleaved layout +TEST(HostTensor, fill_sequential_column_major_interleaved) { + + int const M = 16; + int const N = 16; + int const kInterleave = 4; + + cutlass::Coord<2> bounds = cutlass::make_Coord(M, N); + + // Define a mapping function for column-major interleaved layout + typedef cutlass::MatrixLayout::ColumnMajorInterleaved TensorRefMapFunc; + + // Construct a rank=2 host tensor of size M-by-N + cutlass::HostTensor< + int, + 2, + TensorRefMapFunc > host_tensor(TensorRefMapFunc::stride(M), bounds); + + // Fill it with zeros and synchronize device + host_tensor.fill(0); + host_tensor.sync_device(); + + // Launch a CUDA kernel by obtaining a TensorView of the device memory + dim3 block(16, 16); + dim3 grid((M + block.x - 1) / block.x, (N + block.y - 1) / block.y); + + test::fill_sequential<<< grid, block >>>(host_tensor.device_view()); + + ASSERT_EQ(cudaDeviceSynchronize(), cudaSuccess); + + // Synchronize the host data + host_tensor.sync_host(); + + // Verify host_tensor contains sequential elements + int errors = 0; + for (int n = 0; n < N; ++n) { + for (int m = 0; m < M; ++m) { + int expected = m + n * M; + int got = host_tensor.at(cutlass::make_Coord(m, n)); + if (got != expected) { + ++errors; + } + } + } + + EXPECT_EQ(errors, 0) << std::setw(4) << host_tensor << std::endl; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// cutlass::HostMatrix extends cutlass::HostTensor of rank=2 to facilitate allocate and operating +// on matrices in device memory. +// +// cutlass::HostMatrix accommodates both row-major and column-major matrices with a single +// leading dimension. +// +// The first test demonstrates use of HostMatrix<> in the same circumstances as HostTensor but with +// simplifcations to the calling interface. +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// This test constructs a CUTLASS cutlass::HostMatrix with column-major layout. +TEST(HostMatrix, fill_sequential_column_major) { + + int const M = 16; + int const N = 32; + int const ldm = M + 2; // define leading dimension with padding + + cutlass::Coord<2> bounds = cutlass::make_Coord(M, N); + + // Construct a HostMatrix of size M-by-N with leading dimension ldm + cutlass::HostMatrix host_matrix(bounds, cutlass::MatrixLayout::kColumnMajor, ldm); + + // Fill it with zeros and synchronize device + host_matrix.fill(0); + host_matrix.sync_device(); + + // Launch a CUDA kernel by obtaining a TensorView of the device memory + dim3 block(16, 16); + dim3 grid((M + block.x - 1) / block.x, (N + block.y - 1) / block.y); + + test::fill_sequential<<< grid, block >>>(host_matrix.device_view()); + + ASSERT_EQ(cudaDeviceSynchronize(), cudaSuccess); + + // Synchronize the host data + host_matrix.sync_host(); + + // Verify host_matrix contains sequential elements + int errors = 0; + for (int n = 0; n < N; ++n) { + for (int m = 0; m < M; ++m) { + int expected = m + n * M; + int got = host_matrix.at(cutlass::make_Coord(m, n)); + if (expected != got) { + ++errors; + } + } + } + + EXPECT_EQ(errors, 0) << std::setw(4) << host_matrix << std::endl; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Previously, cutlass::HostTensorView<> offered a gemm() method defined for the H and W dimensions. +// The other dimensions were ignored. +// +// To improve the interface, we We have moved this into the HostMatrixView<> and HostMatrix<> +// classes which require rank=2. To accommodate matrix operands of differing layout, we have extracted +// the host-side GEMM implementation into cutlass::reference::host::Gemm() which can compute the +// general matrix product of matrices with arbitrary layout. +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// This test constructs a CUTLASS cutlass::HostMatrix with column-major layout. +TEST(HostMatrix, gemm) { + + // Problem size intentionally small, as reference check has complexity O(MNK). + int const M = 32; + int const N = 16; + int const K = 4; + + int const lda = M; + int const ldb = N; + int const ldc = M; + + // Construct matrix operands + cutlass::HostMatrix A(cutlass::make_Coord(M, K), cutlass::MatrixLayout::kColumnMajor, lda); + cutlass::HostMatrix B(cutlass::make_Coord(K, N), cutlass::MatrixLayout::kRowMajor, ldb); + cutlass::HostMatrix C(cutlass::make_Coord(M, N), cutlass::MatrixLayout::kColumnMajor, ldc); + + A.fill_sequential(); + B.fill_sequential(); + C.fill(0); + + int alpha = 1; + + // Compute host-side GEMM reference + cutlass::reference::host::Gemm( + cutlass::gemm::GemmCoord(K, N, M), + alpha, + A.host_ref(), + B.host_ref(), + int(0), // beta + C.host_ref()); + + // Verify result + int errors = 0; + + // Primitive reference implementation for matrix product + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + int result = 0; + for (int k = 0; k < K; ++k) { + result += A.at(cutlass::make_Coord(i, k)) * B.at(cutlass::make_Coord(k, j)); + } + if (C.at(cutlass::make_Coord(i, j)) != alpha * result) { + ++errors; + } + } + } + + EXPECT_EQ(errors, 0) << "GEMM error\n" + << "A =\n" << A << "\nB = \n" << B << "\nC =\n" << C << "\n"; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// When layout is known at compile time, we may be use the corresponding helper classes to smplify +// matrix instantiation. The matrix layout becomes part of the type which reduces the StorageRank +// of the internal stride vector. +// +// Apart from specifying the matrix layout at compile time, this test is functionally identical to +// HostMatrix.gemm. +// +TEST(HostMatrix, gemm_compile_time_layout) { + + // Problem size intentionally small, as reference check has complexity O(MNK). + int const M = 32; + int const N = 16; + int const K = 4; + + int const lda = M; + int const ldb = N; + int const ldc = M; + + // Construct matrix operands + cutlass::HostMatrixColumnMajor A(cutlass::make_Coord(M, K), lda); + cutlass::HostMatrixRowMajor B(cutlass::make_Coord(K, N), ldb); + cutlass::HostMatrixColumnMajor C(cutlass::make_Coord(M, N), ldc); + + A.fill_sequential(); + B.fill_sequential(); + C.fill(0); + + int alpha = 1; + + // Compute host-side GEMM reference + cutlass::reference::host::Gemm( + cutlass::gemm::GemmCoord(K, N, M), + alpha, + A.host_ref(), + B.host_ref(), + int(0), // beta + C.host_ref()); + + // Verify result + int errors = 0; + + // Primitive reference implementation for matrix product + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + int result = 0; + for (int k = 0; k < K; ++k) { + result += A.at(cutlass::make_Coord(i, k)) * B.at(cutlass::make_Coord(k, j)); + } + if (C.at(cutlass::make_Coord(i, j)) != alpha * result) { + ++errors; + } + } + } + + EXPECT_EQ(errors, 0) << "GEMM error\n" + << "A =\n" << A << "\nB = \n" << B << "\nC =\n" << C << "\n"; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/util/tensor_elementwise.cu b/tools/test/unit/util/tensor_elementwise.cu new file mode 100644 index 000000000..a983a4f4c --- /dev/null +++ b/tools/test/unit/util/tensor_elementwise.cu @@ -0,0 +1,324 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/* \file + + \brief + + These tests initialize host- and device-side tensors according to several random distributions. +*/ + +#include "cutlass_unit_test.h" + +#include "cutlass/matrix_traits.h" + +#include "tools/util/tensor_view_io.h" +#include "tools/util/host_tensor.h" +#include "tools/util/host_matrix.h" + +#include "tools/util/reference/device/tensor_foreach.h" +#include "tools/util/reference/device/tensor_elementwise.h" + +#include "tools/util/reference/host/tensor_foreach.h" +#include "tools/util/reference/host/tensor_elementwise.h" + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +#define ENABLE_OUTPUT 0 // Supress output by default. + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TensorInitialize, uniform_device) { + + // Define the problem size + int const M = 517; + int const N = 117; + + // Define HostMatrix type + typedef cutlass::HostMatrix HostMatrix; + + // Construct the host matrix + HostMatrix source(cutlass::MatrixCoord(M, N), cutlass::MatrixLayout::kRowMajor); + source.fill(0); + + // Initialize the source matrix with a uniform distribution + cutlass::Distribution dist; + dist.set_uniform(0, 128, -1); + + // RNG seed is hard-coded for determinism in the test. + unsigned seed = 2080; + + cutlass::reference::device::TensorInitialize(source.device_view(), seed, dist); + + source.sync_host(); + + if (ENABLE_OUTPUT) { + std::ofstream result("TensorInitialize_uniform_device.csv"); + + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + result << source.at(cutlass::make_Coord(i, j)) << "\n"; + } + } + } +} + +TEST(TensorInitialize, uniform_host) { + + // Define the problem size + int const M = 517; + int const N = 117; + + bool const kDeviceBacked = false; + + // Define HostMatrix type + typedef cutlass::HostMatrix HostMatrix; + + // Construct the host matrix + HostMatrix source(cutlass::MatrixCoord(M, N), cutlass::MatrixLayout::kRowMajor, kDeviceBacked); + source.fill(0); + + // Initialize the source matrix with a uniform distribution + cutlass::Distribution dist; + dist.set_uniform(0, 128, -1); + + // RNG seed is hard-coded for determinism in the test. + unsigned seed = 2080; + + cutlass::reference::host::TensorInitialize(source.host_view(), seed, dist); + + if (ENABLE_OUTPUT) { + std::ofstream result("TensorInitialize_uniform_host.csv"); + + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + result << source.at(cutlass::make_Coord(i, j)) << "\n"; + } + } + } +} + +TEST(TensorInitialize, gaussian_device) { + + // Define the problem size + int const M = 517; + int const N = 117; + + + // Define HostMatrix type + typedef cutlass::HostMatrix HostMatrix; + + // Construct the host matrix + HostMatrix source(cutlass::MatrixCoord(M, N), cutlass::MatrixLayout::kRowMajor); + source.fill(0); + + // Initialize the source matrix with a uniform distribution + cutlass::Distribution dist; + dist.set_gaussian(1, 2, -1); + + // RNG seed is hard-coded for determinism in the test. + unsigned seed = 2080; + + cutlass::reference::device::TensorInitialize(source.device_view(), seed, dist); + + source.sync_host(); + + if (ENABLE_OUTPUT) { + std::ofstream result("TensorInitialize_gaussian_device.csv"); + + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + result << source.at(cutlass::make_Coord(i, j)) << "\n"; + } + } + } +} + +TEST(TensorInitialize, gaussian_host) { + // Define the problem size + int const M = 517; + int const N = 117; + + bool const kDeviceBacked = false; + + // Define HostMatrix type + typedef cutlass::HostMatrix HostMatrix; + + // Construct the host matrix + HostMatrix source(cutlass::MatrixCoord(M, N), cutlass::MatrixLayout::kRowMajor, kDeviceBacked); + source.fill(0); + + // Initialize the source matrix with a uniform distribution + cutlass::Distribution dist; + dist.set_gaussian(1, 2, -1); + + // RNG seed is hard-coded for determinism in the test. + unsigned seed = 2080; + + cutlass::reference::host::TensorInitialize(source.host_view(), seed, dist); + + if (ENABLE_OUTPUT) { + std::ofstream result("TensorInitialize_gaussian_host.csv"); + + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + result << source.at(cutlass::make_Coord(i, j)) << "\n"; + } + } + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Interleaved matrix layouts +// +/////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TensorInitialize, interleaved_gaussian_device) { + + // Define the problem size + int const M = 512; + int const N = 128; + + // Define a mapping function for column-major interleaved layout + int const kInterleave = 4; + typedef cutlass::MatrixLayout::ColumnMajorInterleaved TensorRefMapFunc; + + // Construct a rank=2 host tensor of size M-by-N + cutlass::HostTensor< + float, + 2, + TensorRefMapFunc > source(TensorRefMapFunc::stride(M), cutlass::make_Coord(M, N)); + + source.fill(0); + + // Initialize the source matrix with a uniform distribution + cutlass::Distribution dist; + dist.set_gaussian(1, 2, -1); + + // RNG seed is hard-coded for determinism in the test. + unsigned seed = 2080; + + cutlass::reference::device::TensorInitialize(source.device_view(), seed, dist); + + source.sync_host(); + + if (ENABLE_OUTPUT) { + std::ofstream result("TensorInitialize_interleaved_gaussian_device.csv"); + + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + result << source.at(cutlass::make_Coord(i, j)) << "\n"; + } + } + } +} + +TEST(TensorInitialize, interleaved_gaussian_host) { + // Define the problem size + int const M = 512; + int const N = 128; + + bool const kDeviceBacked = false; + + // Define a mapping function for column-major interleaved layout + int const kInterleave = 4; + typedef cutlass::MatrixLayout::ColumnMajorInterleaved TensorRefMapFunc; + + // Construct a rank=2 host tensor of size M-by-N + cutlass::HostTensor< + float, + 2, + TensorRefMapFunc > source(TensorRefMapFunc::stride(M), cutlass::make_Coord(M, N), kDeviceBacked); + + // Construct the host matrix + source.fill(0); + + // Initialize the source matrix with a uniform distribution + cutlass::Distribution dist; + dist.set_gaussian(1, 2, -1); + + // RNG seed is hard-coded for determinism in the test. + unsigned seed = 2080; + + cutlass::reference::host::TensorInitialize(source.host_view(), seed, dist); + + if (ENABLE_OUTPUT) { + std::ofstream result("TensorInitialize_interleaved_gaussian_host.csv"); + + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + result << source.at(cutlass::make_Coord(i, j)) << "\n"; + } + } + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Comparison operator +// +/////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(TensorEquals, interleaved_device) { + + // Define the problem size + int const M = 512; + int const N = 128; + + // Define a mapping function for column-major interleaved layout + int const kInterleave = 4; + typedef cutlass::MatrixLayout::ColumnMajorInterleaved TensorRefMapFunc; + + // Construct two rank=2 host tensor of size M-by-N + cutlass::HostTensor< + float, + 2, + TensorRefMapFunc > left(TensorRefMapFunc::stride(M), cutlass::make_Coord(M, N)); + + cutlass::HostTensor< + float, + 2, + TensorRefMapFunc > right(TensorRefMapFunc::stride(M), cutlass::make_Coord(M, N)); + + // Initialize + left.fill_sequential(); + right.fill_sequential(); + + // Assert equality + EXPECT_TRUE(cutlass::reference::device::TensorEquals(left.device_view(), right.device_view())); + + // Overwrite one with an unexpected element + left.at(cutlass::make_Coord(24, 17)) = -1; + left.sync_device(); + + // Assert inequality + EXPECT_FALSE(cutlass::reference::device::TensorEquals(left.device_view(), right.device_view())); +} + +TEST(TensorEquals, interleaved_host) { + +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/util/tensor_foreach.cu b/tools/test/unit/util/tensor_foreach.cu new file mode 100644 index 000000000..dcb965987 --- /dev/null +++ b/tools/test/unit/util/tensor_foreach.cu @@ -0,0 +1,217 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/* \file + + \brief + + These tests are intended to demonstrate the CUTLASS reference implementation for basic for-each + operators on the index space of TensorView objects. They instantiate a HostMatrix, initialize + its elements with random data according to specified random distributions, and clamp the + elements using a TensorForEach() operation. + + Both device-side and host-side reference implementations are called. +*/ + +#include "cutlass_unit_test.h" + +#include "cutlass/matrix_traits.h" + +#include "tools/util/tensor_view_io.h" +#include "tools/util/host_tensor.h" +#include "tools/util/host_matrix.h" + +#include "tools/util/reference/device/tensor_foreach.h" +#include "tools/util/reference/device/tensor_elementwise.h" + +#include "tools/util/reference/host/tensor_foreach.h" +#include "tools/util/reference/host/tensor_elementwise.h" + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace test { + +/// Define a functor that computes the ReLu operation on a tensor. +template +struct ReLuFunc { + + /// Coordinate of index space + typedef typename View::TensorCoord TensorCoord; + + /// Scalar type + typedef typename View::Storage T; + + // + // Data members + // + + /// Tensor view + View view; + + /// ReLu threshold + T threshold; + + // + // Methods + // + + /// Constructor + CUTLASS_HOST_DEVICE + ReLuFunc(View const &view, T threshold): view(view), threshold(threshold) { } + + /// ReLu function + CUTLASS_HOST_DEVICE + void operator()(TensorCoord const &coord) { + T value = view.at(coord); + + if (value < threshold) { + value = threshold; + } + + view.at(coord) = value; + } +}; + +} // namespace test + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// This tests models the computation of ReLu using reference utility code. +TEST(TensorForEach, ReLu_device) { + + // Define HostMatrix type + typedef cutlass::HostMatrix HostMatrix; + typedef typename HostMatrix::DeviceTensorView View; + + // Define the problem size + int const M = 517; + int const N = 117; + + float threshold = 0; + + // Construct the host matrix + HostMatrix source(cutlass::MatrixCoord(M, N), cutlass::MatrixLayout::kRowMajor); + source.fill(0); + + // Initialize the source matrix with a uniform distribution + cutlass::Distribution dist; + dist.set_uniform(-16, 16); + + // RNG seed is hard-coded for determinism in the test. + int64_t seed = 2080; + + cutlass::reference::device::TensorInitialize(source.device_view(), seed, dist); + + // Define a functor called by TensorForEach<> + typedef test::ReLuFunc ReLuFunc; + + // Instantiate on host with TensorView and threshold value + ReLuFunc relu_func(source.device_view(), threshold); + + // Launch kernel that applies the element-wise operator over the tensor's index space. + cutlass::reference::device::TensorForEach< + ReLuFunc, + View::kRank, + ReLuFunc>(source.size(), relu_func); + + // Verify no element is less than the ReLu threshold. + source.sync_host(); + + int errors = 0; + for (cutlass::MatrixCoord coord(0, 0); coord.row() < M; ++coord.row()) { + for (coord.column() = 0; coord.column() < N; ++coord.column()) { + if (source.at(coord) < threshold) { + ++errors; + if (errors < 10) { + std::cout << "Error - source(" << coord << ") = " + << source.at(coord) << " is less than threshold " << threshold << std::endl; + } + } + } + } + + EXPECT_EQ(errors, 0) + << "Result: " << source; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Test to apply the ReLu operation using host-side utilities +TEST(TensorForEach, ReLu_host) { + + // Define HostMatrix type + typedef cutlass::HostMatrix HostMatrix; + typedef typename HostMatrix::HostTensorView View; + + // Define the problem size + int const M = 517; + int const N = 117; + + float threshold = 0; + + bool const kDeviceBacked = false; + + // Construct the host matrix + HostMatrix source(cutlass::MatrixCoord(M, N), cutlass::MatrixLayout::kRowMajor, kDeviceBacked); + source.fill(0); + + // Initialize the source matrix with a uniform distribution + cutlass::Distribution dist; + dist.set_gaussian(-1, 4); + + // RNG seed is hard-coded for determinism in the test. + unsigned seed = 2080; + + cutlass::reference::host::TensorInitialize(source.host_view(), seed, dist); + + // Define a functor called by TensorForEach<> + typedef test::ReLuFunc ReLuFunc; + + // Instantiate on host with TensorView and threshold value + ReLuFunc relu_func(source.host_view(), threshold); + + // Invoke host-side for-each computation on the tensor + cutlass::reference::host::TensorForEach< + ReLuFunc, + View::kRank, + ReLuFunc>(source.size(), relu_func); + + int errors = 0; + for (cutlass::MatrixCoord coord(0, 0); coord.row() < M; ++coord.row()) { + for (coord.column() = 0; coord.column() < N; ++coord.column()) { + if (source.at(coord) < threshold) { + ++errors; + if (errors < 10) { + std::cout << "Error - source(" << coord << ") = " + << source.at(coord) << " is less than threshold " << threshold << std::endl; + } + } + } + } + + EXPECT_EQ(errors, 0) + << "Result: " << source; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/test/unit/util/unique_ptr.cu b/tools/test/unit/util/unique_ptr.cu new file mode 100644 index 000000000..7676efb5e --- /dev/null +++ b/tools/test/unit/util/unique_ptr.cu @@ -0,0 +1,25 @@ +/****************************************************************************** +* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are not permitted. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY +* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +******************************************************************************/ + +#include "cutlass_unit_test.h" +#include "cutlass/util/platform.h" + +TEST(unique_ptr, basic) { + cutlass::platform::unique_ptr ptr(new int); +} diff --git a/tools/util/command_line.h b/tools/util/command_line.h index 8f2b17aa9..d4bb96fea 100644 --- a/tools/util/command_line.h +++ b/tools/util/command_line.h @@ -108,7 +108,7 @@ struct CommandLine { } /** - * Returns the commandline parameter for a given index (not including flags) + * Returns the boolean value specified for a given commandline parameter --= */ void get_cmd_line_argument(const char* arg_name, bool& val, bool _default = true) const { val = _default; @@ -156,27 +156,7 @@ struct CommandLine { for (int i = 0; i < keys.size(); ++i) { if (keys[i] == string(arg_name)) { string val_string(values[i]); - istringstream str_stream(val_string); - string::size_type old_pos = 0; - string::size_type new_pos = 0; - - // Iterate -delimited values - value_t val; - while ((new_pos = val_string.find(sep, old_pos)) != string::npos) { - if (new_pos != old_pos) { - str_stream.width(new_pos - old_pos); - str_stream >> val; - vals.push_back(val); - } - - // skip over delimiter - str_stream.ignore(1); - old_pos = new_pos + 1; - } - - // Read last value - str_stream >> val; - vals.push_back(val); + seperate_string(val_string, vals, sep); } } } @@ -184,7 +164,7 @@ struct CommandLine { /** * Returns the values specified for a given commandline parameter - * --=,* + * --=,* */ void get_cmd_line_argument_pairs(const char* arg_name, std::vector >& tokens, @@ -198,6 +178,26 @@ struct CommandLine { } } + /** + * Returns a list of ranges specified for a given commandline parameter + * --=,* + */ + void get_cmd_line_argument_ranges(const char* arg_name, + std::vector >& vals, + char delim = ',', + char sep = ':') const { + std::vector ranges; + get_cmd_line_arguments(arg_name, ranges, delim); + + for (std::vector::const_iterator range = ranges.begin(); + range != ranges.end(); ++range) { + + std::vector range_vals; + seperate_string(*range, range_vals, sep); + vals.push_back(range_vals); + } + } + /** * The number of pairs parsed */ @@ -249,6 +249,33 @@ struct CommandLine { tokens.push_back(tok->first); } } + + template + static void seperate_string(std::string const& str, + std::vector& vals, + char sep = ',') { + std::istringstream str_stream(str); + std::string::size_type old_pos = 0; + std::string::size_type new_pos = 0; + + // Iterate -delimited values + value_t val; + while ((new_pos = str.find(sep, old_pos)) != std::string::npos) { + if (new_pos != old_pos) { + str_stream.width(new_pos - old_pos); + str_stream >> val; + vals.push_back(val); + } + + // skip over delimiter + str_stream.ignore(1); + old_pos = new_pos + 1; + } + + // Read last value + str_stream >> val; + vals.push_back(val); + } }; } // namespace cutlass diff --git a/tools/util/device_memory.h b/tools/util/device_memory.h index c627c54cc..0aa0532cb 100644 --- a/tools/util/device_memory.h +++ b/tools/util/device_memory.h @@ -26,9 +26,9 @@ #include -#include -#include -#include +#include "cutlass/util/debug.h" +#include "cutlass/util/platform.h" +#include "tools/util/exceptions.h" namespace cutlass { namespace device_memory { @@ -124,6 +124,10 @@ struct allocation { } }; + // + // Data members + // + /// Number of elements of T allocated on the current CUDA device size_t capacity; @@ -131,7 +135,7 @@ struct allocation { platform::unique_ptr smart_ptr; // - // + // Methods // /// Constructor: allocates no memory @@ -140,6 +144,11 @@ struct allocation { /// Constructor: allocates \p capacity elements on the current CUDA device allocation(size_t _capacity) : smart_ptr(allocate(_capacity)), capacity(_capacity) {} + /// Copy constructor + allocation(allocation const &p): smart_ptr(allocate(p.capacity)), capacity(p.capacity) { + copy_device_to_device(smart_ptr.get(), p.get(), capacity); + } + /// Destructor ~allocation() { reset(); } @@ -172,6 +181,16 @@ struct allocation { /// Returns the deleter object which would be used for destruction of the managed object (const) const deleter& get_deleter() const { return smart_ptr.get_deleter(); } + + /// Copies a device-side memory allocation + allocation & operator=(allocation const &p) { + if (capacity != p.capacity) { + smart_ptr.reset(allocate(p.capacity)); + capacity = p.capacity; + } + copy_device_to_device(smart_ptr.get(), p.get(), capacity); + return *this; + } }; } // namespace device_memory diff --git a/tools/util/distribution.h b/tools/util/distribution.h new file mode 100644 index 000000000..1c2701fc3 --- /dev/null +++ b/tools/util/distribution.h @@ -0,0 +1,138 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#pragma once + +/*! \file + \brief This header contains a class to parametrize a statistical distribution function. +*/ + +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Distribution type +struct Distribution { + /// Variant types + enum Kind { Invalid, Uniform, Gaussian, Linear, Identity }; + + /// Distribution state + union { + /// Uniform distribution + struct { + double min; + double max; + } uniform; + + /// Gaussian distribution + struct { + double mean; + double stddev; + } gaussian; + + /// Elements are linear combination of row and column index + struct { + double offset; + double delta_row; + double delta_column; + } linear; + }; + + /// Active variant kind + Kind kind; + + /// Random values are cast to integer after scaling by this power of two + int int_scale; + + // + // Methods + // + + Distribution() : kind(Invalid), int_scale(0) {} + + /// Configures distribution as uniform random + Distribution &set_uniform(double _min, double _max, int _int_scale = 0) { + kind = Uniform; + uniform.min = _min; + uniform.max = _max; + int_scale = _int_scale; + return *this; + } + + /// Configures distribution as Gaussian distribution + Distribution &set_gaussian(double _mean, double _stddev, int _int_scale = 0) { + kind = Gaussian; + gaussian.mean = _mean; + gaussian.stddev = _stddev; + int_scale = _int_scale; + return *this; + } + + /// Sets identity + Distribution &set_identity() { + kind = Identity; + return *this; + } + + /// Configures distribution as linear combination of row and column index + Distribution &set_linear(double _offset, double _delta_row, double _delta_column) { + kind = Linear; + linear.offset = _offset; + linear.delta_row = _delta_row; + linear.delta_column = _delta_column; + return *this; + } +}; + +} // namespace cutlass + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Prints a Distribution to ostream +inline std::ostream &operator<<(std::ostream &out, cutlass::Distribution const &dist) { + switch (dist.kind) { + case cutlass::Distribution::Uniform: + out << "uniform, min: " << dist.uniform.min << ", max: " << dist.uniform.max; + break; + case cutlass::Distribution::Gaussian: + out << "gaussian, mean: " << dist.gaussian.mean << ", stddev: " << dist.gaussian.stddev; + break; + case cutlass::Distribution::Linear: + out << "linear, mean: " << dist.linear.offset << ", delta_row: " << dist.linear.delta_row + << ", delta_column: " << dist.linear.delta_column; + break; + case cutlass::Distribution::Identity: + break; + default: + out << "unknown"; + } + + out << ", int_scale: " << dist.int_scale; + + return out; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/util/exceptions.h b/tools/util/exceptions.h index 72d99fec0..3683fbf4f 100644 --- a/tools/util/exceptions.h +++ b/tools/util/exceptions.h @@ -28,7 +28,7 @@ #include #include -#include +#include "cutlass/util/platform.h" namespace cutlass { diff --git a/tools/util/half.h b/tools/util/half.h index ee536e5b3..91e8b1130 100644 --- a/tools/util/half.h +++ b/tools/util/half.h @@ -107,6 +107,33 @@ class half_t { uint16_t& raw() { return x; } uint16_t raw() const { return x; } + // + // Stream interactions + // + + /// put to stream - half_t-precision types bitcast as unsigned shorts if base is hexadecimal + friend std::ostream& operator<<(std::ostream& out, cutlass::half_t const& h) { + if (out.flags() & std::ios::hex) { + return out << h.x; + } else { + return out << float(h); + } + } + + /// read from stream - half_t-precision types parsed as unsigned shorts if base is hexadecimal + friend std::istream& operator>>(std::istream& in, cutlass::half_t& h) { + if (in.flags() & std::ios::hex) { + unsigned short u = 0; + in >> u; + h = cutlass::half_t::bitcast(u); + } else { + float f = 0; + in >> f; + h = cutlass::half_t(f); + } + return in; + } + public: /// data unsigned short x; @@ -167,9 +194,6 @@ cutlass::half_t operator-(float, cutlass::half_t const&); cutlass::half_t operator*(float, cutlass::half_t const&); cutlass::half_t operator/(float, cutlass::half_t const&); -std::ostream& operator<<(std::ostream&, cutlass::half_t const&); /// writes a half_t -std::istream& operator>>(std::istream&, cutlass::half_t&); /// reads a half_t - #ifdef BOOST_LEXICAL_CAST_INCLUDED namespace boost { @@ -714,30 +738,3 @@ inline cutlass::half_t sqrt(cutlass::half_t const& h) { return cutlass::half_t(std::sqrt(float(h))); } } // namespace std - -// -// Stream interactions -// - -/// put to stream - half_t-precision types bitcast as unsigned shorts if base is hexadecimal -inline std::ostream& operator<<(std::ostream& out, cutlass::half_t const& h) { - if (out.flags() & std::ios::hex) { - return out << h.x; - } else { - return out << float(h); - } -} - -/// read from stream - half_t-precision types parsed as unsigned shorts if base is hexadecimal -inline std::istream& operator>>(std::istream& in, cutlass::half_t& h) { - if (in.flags() & std::ios::hex) { - unsigned short u = 0; - in >> u; - h = cutlass::half_t::bitcast(u); - } else { - float f = 0; - in >> f; - h = cutlass::half_t(f); - } - return in; -} diff --git a/tools/util/host_matrix.h b/tools/util/host_matrix.h new file mode 100644 index 000000000..9812f757d --- /dev/null +++ b/tools/util/host_matrix.h @@ -0,0 +1,264 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#pragma once + +/*! \file + \brief HostMatrix is a helper to define a HostTensor of rank=2 with a contiguous layout. + + See tools/util/host_tensor.h for more details. +*/ + +#include "cutlass/matrix_traits.h" +#include "tools/util/host_tensor.h" + +#include "tools/util/reference/host/gemm.h" + +namespace cutlass { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Helper to define a rank=2 host matrix with contiguous layout +template < + typename T +> +class HostMatrix : + public HostTensor { +public: + + /// Base class is a HostTensor of rank=2 with contiguous layout + typedef HostTensor Base; + + /// Index type + typedef typename Base::Index Index; + +private: + + /// Layout of contiguous matrix + MatrixLayout::Kind layout_; + +public: + + /// Default ctor + HostMatrix(): layout_(MatrixLayout::kColumnMajor) { } + + /// Constructs a HostTensor from size. Assumes column-major and infers leading dimension + HostMatrix(MatrixCoord const& size, bool _device_backed = true): layout_(MatrixLayout::kColumnMajor) { + Index ldm = size[0]; + this->reset(MatrixLayout::ContiguousLayout::stride(layout_, ldm), size, _device_backed); + } + + /// Constructs a HostTensor from size and layout - infers leading dimension + HostMatrix(MatrixCoord const& size, MatrixLayout::Kind layout, bool _device_backed = true): layout_(layout) { + Index ldm = (layout_ == MatrixLayout::kColumnMajor ? size[0] : size[1]); + this->reset(MatrixLayout::ContiguousLayout::stride(layout_, ldm), size, _device_backed); + } + + /// Constructs a HostTensor given size, layout, and leading dimension + HostMatrix(MatrixCoord const& size, Index ldm, MatrixLayout::Kind layout, bool _device_backed = true): layout_(layout) { + this->reset(MatrixLayout::ContiguousLayout::stride(layout_, ldm), size, _device_backed); + } + + /// Returns contiguous matrix layout kind + MatrixLayout::Kind get_layout() const { + return layout_; + } + + /// Resizes a matrix + void resize(MatrixCoord const &_size, MatrixLayout::Kind layout, Index ldm = 0, bool _device_backed = true) { + if (!ldm) { + ldm = (layout == MatrixLayout::kColumnMajor ? _size[0] : _size[1]); + } + layout_ = layout; + this->reset(MatrixLayout::ContiguousLayout::stride(layout_, ldm), _size, _device_backed); + } + + /// Helper to resize matrix + void resize(Index rows, Index columns, MatrixLayout::Kind layout, Index ldm = 0, bool _device_backed = true) { + this->resize(MatrixCoord(rows, columns), layout, ldm,_device_backed); + } + + /// Helper to resize matrix + void resize_matrix(Index rows, Index columns, MatrixLayout::Kind layout, Index ldm = 0, bool _device_backed = true) { + this->resize(MatrixCoord(rows, columns), layout, ldm,_device_backed); + } + + /// Gets the leading dimension of the matrix + Index leading_dim() const { + if (layout_ == MatrixLayout::kColumnMajor) { + return this->stride(MatrixLayout::ContiguousLayout::kColumn); + } + else { + return this->stride(MatrixLayout::ContiguousLayout::kRow); + } + } + + /// Returns size as a MatrixCoord + MatrixCoord size() const { + return MatrixCoord(Base::size()); + } + + /// Returns size in the given dimension + Index size(int idx) const { + return Base::size(idx); + } + + /// Helper to call GEMM operation on HostMatrix objects that differ only in their scalar type. + template + void gemm( + HostMatrix const& tensor_a, + HostMatrix const& tensor_b, + Stype alpha = Stype(1), + Stype beta = Stype(0)) { + + gemm::GemmCoord problem_size( + tensor_a.size().column(), + this->size().column(), + this->size().row(), + 1); + + cutlass::reference::host::Gemm( + problem_size, + alpha, + tensor_a, + tensor_b, + beta, + *this, + Ctype(0)); + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Helper to define a rank=2 host matrix with column-major layout +template < + typename T +> +class HostMatrixColumnMajor : + public HostTensor { +public: + + /// Base class is a HostTensor of rank=2 with contiguous layout + typedef HostTensor Base; + + /// Tensor coordinate + typedef typename Base::TensorCoord TensorCoord; + + /// Index type + typedef typename Base::Index Index; + +public: + + /// Default ctor + HostMatrixColumnMajor() { } + + /// Constructs a HostMatrixColumnMajor from size. Assumes column-major and infers leading dimension + HostMatrixColumnMajor(TensorCoord const& size, bool _device_backed = true): Base(size, size[0], _device_backed) { + + } + + /// Constructs a HostMatrixColumnMajor given size, layout, and leading dimension + HostMatrixColumnMajor(TensorCoord const& size, Index ldm, bool _device_backed = true) { + this->reset(make_Coord(ldm, 1), size, _device_backed); + } + + /// Resizes a matrix + void resize(MatrixCoord const &size, int ldm = 0, bool _device_backed = true) { + this->reset(ldm, size, _device_backed); + } + + /// Returns contiguous matrix layout kind + MatrixLayout::Kind get_layout() const { + return MatrixLayout::kColumnMajor; + } + + /// Gets the leading dimension of the matrix + Index leading_dim() const { + return this->stride(0); + } + + /// Returns size as a MatrixCoord + MatrixCoord size() const { + return MatrixCoord(Base::size()); + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Helper to define a rank=2 host matrix with row-major layout +template < + typename T +> +class HostMatrixRowMajor : + public HostTensor { +public: + + /// Base class is a HostTensor of rank=2 with contiguous layout + typedef HostTensor Base; + + /// Tensor coordinate + typedef typename Base::TensorCoord TensorCoord; + + /// Index type + typedef typename Base::Index Index; + +public: + + /// Default ctor + HostMatrixRowMajor() { } + + /// Constructs a HostTensor from size. Assumes column-major and infers leading dimension + HostMatrixRowMajor(TensorCoord const& size, bool _device_backed = true) { + this->reset(make_Coord(size[1], 1), size, _device_backed); + } + + /// Constructs a HostTensor given size, layout, and leading dimension + HostMatrixRowMajor(TensorCoord const& size, Index ldm, bool _device_backed = true) { + this->reset(make_Coord(ldm, 1), size, _device_backed); + } + + /// Resizes a matrix + void resize(MatrixCoord const &size, int ldm = 0, bool _device_backed = true) { + this->reset(ldm, size, _device_backed); + } + + /// Returns contiguous matrix layout kind + MatrixLayout::Kind get_layout() const { + return MatrixLayout::kRowMajor; + } + + /// Gets the leading dimension of the matrix + Index leading_dim() const { + return this->stride(0); + } + + /// Returns size as a MatrixCoord + MatrixCoord size() const { + return MatrixCoord(Base::size()); + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/tools/util/host_matrix_view.h b/tools/util/host_matrix_view.h new file mode 100644 index 000000000..84767878c --- /dev/null +++ b/tools/util/host_matrix_view.h @@ -0,0 +1,205 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#pragma once + +/*! \file + \brief HostMatrix is a helper to define a HostTensor of rank=2 with a contiguous layout. + + See tools/util/host_tensor.h for more details. +*/ + +#include "cutlass/matrix_traits.h" +#include "tools/util/host_tensor.h" + +#include "tools/util/reference/host/gemm.h" + +namespace cutlass { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Helper to define a rank=2 host matrix with contiguous layout +template < + typename T +> +class HostMatrixView : + public HostTensorView { +public: + + /// Base class is a HostTensor of rank=2 with contiguous layout + typedef HostTensorView Base; + + /// Tensor coordinate + typedef typename Base::TensorCoord TensorCoord; + + /// Index type + typedef typename Base::Index Index; + +private: + + /// Layout of contiguous matrix + MatrixLayout::Kind layout_; + +public: + + /// Default ctor + HostMatrixView(): layout_(MatrixLayout::kColumnMajor) { } + + /// Constructs a HostTensor from size. Assumes column-major and infers leading dimension + HostMatrixView(TensorCoord const& size): layout_(MatrixLayout::kColumnMajor) { + Index ldm = size[0]; + this->reset(MatrixLayout::ContiguousLayout::stride(layout_, ldm), size); + } + + /// Constructs a HostTensor from size and layout - infers leading dimension + HostMatrixView(TensorCoord const& size, MatrixLayout::Kind layout): layout_(layout) { + Index ldm = (layout_ == MatrixLayout::kColumnMajor ? size[0] : size[1]); + this->reset(MatrixLayout::ContiguousLayout::stride(layout_, ldm), size); + } + + /// Constructs a HostTensor given size, layout, and leading dimension + HostMatrixView(TensorCoord const& size, Index ldm, MatrixLayout::Kind layout): layout_(layout) { + this->reset(MatrixLayout::ContiguousLayout::stride(layout_, ldm), size); + } + + /// Gets the leading dimension of the matrix + Index leading_dim() const { + if (layout_ == MatrixLayout::kColumnMajor) { + return this->stride(MatrixLayout::ContiguousLayout::kColumn); + } + else { + return this->stride(MatrixLayout::ContiguousLayout::kRow); + } + } + + /// Returns contiguous matrix layout kind + MatrixLayout::Kind get_layout() const { + return layout_; + } + + /// Returns size as a MatrixCoord + MatrixCoord size() const { + return MatrixCoord(Base::size()); + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Helper to define a rank=2 host matrix with column-major layout +template +class HostMatrixViewColumnMajor : + public HostTensorView { +public: + + /// Base class is a HostTensorView of rank=2 with contiguous layout + typedef HostTensorView Base; + + /// Tensor coordinate + typedef typename Base::TensorCoord TensorCoord; + + /// Index type + typedef typename Base::Index Index; + +public: + + /// Default ctor + HostMatrixViewColumnMajor() { } + + /// Constructs a HostMatrixViewColumnMajor from size. Assumes column-major and infers leading dimension + HostMatrixViewColumnMajor(TensorCoord const& size): Base(size, size[0]) { + + } + + /// Constructs a HostMatrixViewColumnMajor given size, layout, and leading dimension + HostMatrixViewColumnMajor(TensorCoord const& size, Index ldm) { + this->reset(make_Coord(ldm, 1), size); + } + + /// Returns contiguous matrix layout kind + MatrixLayout::Kind get_layout() const { + return MatrixLayout::kColumnMajor; + } + + /// Gets the leading dimension of the matrix + Index leading_dim() const { + return this->stride(0); + } + + /// Returns size as a MatrixCoord + MatrixCoord size() const { + return MatrixCoord(Base::size()); + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Helper to define a rank=2 host matrix with row-major layout +template +class HostMatrixViewRowMajor : + public HostTensorView { +public: + + /// Base class is a HostTensor of rank=2 with contiguous layout + typedef HostTensorView Base; + + /// Tensor coordinate + typedef typename Base::TensorCoord TensorCoord; + + /// Index type + typedef typename Base::Index Index; + +public: + + /// Default ctor + HostMatrixViewRowMajor() { } + + /// Constructs a HostMatrixViewRowMajor from size. Assumes column-major and infers leading dimension + HostMatrixViewRowMajor(TensorCoord const& size): Base(size, size[1]) { + + } + + /// Constructs a HostMatrixViewRowMajor given size, layout, and leading dimension + HostMatrixViewRowMajor(TensorCoord const& size, Index ldm) { + this->reset(make_Coord(ldm, 1), size); + } + + /// Returns contiguous matrix layout kind + MatrixLayout::Kind get_layout() const { + return MatrixLayout::kRowMajor; + } + + /// Gets the leading dimension of the matrix + Index leading_dim() const { + return this->stride(0); + } + + /// Returns size as a MatrixCoord + MatrixCoord size() const { + return MatrixCoord(Base::size()); + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/tools/util/host_tensor.h b/tools/util/host_tensor.h index cc9963c22..fc042b0b7 100644 --- a/tools/util/host_tensor.h +++ b/tools/util/host_tensor.h @@ -25,51 +25,126 @@ #pragma once /*! \file - \brief Template class to perform computations on tensors and manage memory. + \brief HostTensor contributes management for both host and device memory. + + HostTensor allocates host and device memory upon construction. Basic element-wise operations on + host memory synchronize device memory automatically. Explicit copy operations provide abstractions + for CUDA memcpy operations. + + Call device_{data, ref, view} for accessing device memory allocations. + + See cutlass/tensor_ref.h, cutlass/tensor_view.h, and tools/util/host_tensor_view.h for more details. */ -#include -#include -#include -#include -#include +#include "cutlass/cutlass.h" +#include "cutlass/matrix_traits.h" +#include "cutlass/tensor_ref.h" +#include "tools/util/device_memory.h" +#include "tools/util/host_tensor_view.h" +#include "tools/util/type_traits.h" #include namespace cutlass { -template -class HostTensor : public HostTensorView { +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Host tensor +template < + /// Scalar data type (may be mapped to compatible types for use on host and device) + typename T, + /// Rank of logical tensor + int Rank_ = 4, + /// Maps a Coord in the logical tensor index space to the internal n-D array + typename MapFunc_ = IdentityTensorMapFunc, + /// Rank of internal n-D array + int StorageRank_ = MapFunc_::kStorageRank, + /// Index type used for coordinates + typename Index_ = int, + /// Index type used for offsets and pointer differences + typename LongIndex_ = long long +> +class HostTensor : public HostTensorView< + typename TypeTraits::host_type, + Rank_, + MapFunc_, + StorageRank_, + Index_, + LongIndex_> { public: + /// Type used for host-side allocations + typedef typename TypeTraits::host_type HostType; + /// Type used for device-side allocations typedef typename TypeTraits::device_type DeviceType; /// Base class - typedef HostTensorView Base; - - /// If true, allocates device side memory - static bool const DeviceBacked = DeviceBacked_; - - /// Rank of tensor - static int const Rank = Base::Rank; + typedef HostTensorView< + typename TypeTraits::host_type, + Rank_, + MapFunc_, + StorageRank_, + Index_, + LongIndex_> Base; /// Type used to compute the offset of an element to the base of a tensor - typedef typename Base::Offset_t Offset_t; - - /// Tensor reference to host memory - typedef typename Base::TensorRef_t TensorRef_t; + typedef LongIndex_ LongIndex; /// Tensor reference to device memory - typedef TensorRef DeviceTensorRef; + typedef typename cutlass::TensorRef< + DeviceType, + Rank_, + MapFunc_, + StorageRank_, + Index_, + LongIndex_> DeviceTensorRef; /// Tensor reference to constant device memory - typedef TensorRef ConstDeviceTensorRef; + typedef typename DeviceTensorRef::ConstTensorRef ConstDeviceTensorRef; - /// Coordinate into tensor - typedef typename Base::Coord_t Coord_t; + /// TensorView to device memory + typedef TensorView< + DeviceType, + Rank_, + MapFunc_, + StorageRank_, + Index_, + LongIndex_> DeviceTensorView; + + /// Tensor reference to constant device memory + typedef typename DeviceTensorView::ConstTensorView ConstDeviceTensorView; + + /// Tensor reference to host memory + typedef typename Base::TensorRef TensorRef; + + /// Tensor view to host memory + typedef TensorView< + typename TypeTraits::host_type, + Rank_, + MapFunc_, + StorageRank_, + Index_, + LongIndex_> HostTensorView; + + /// Tensor view to host memory + typedef typename HostTensorView::ConstTensorView ConstHostTensorView; + + /// Coordinate in logical tensor space + typedef typename TensorRef::TensorCoord TensorCoord; + + /// Coordinate in storage n-D array + typedef typename TensorRef::StorageCoord StorageCoord; + + /// Stride vector in storage coordinate space + /// Least significant stride is = 1 and not stored + typedef typename TensorRef::StrideVector StrideVector; + + /// Rank of internal storage. + static int const kStorageRank = Base::kStorageRank; private: + /// Host-side memory allocation - std::vector host_; + std::vector host_; /// Device-side memory cutlass::device_memory::allocation device_; @@ -82,232 +157,173 @@ class HostTensor : public HostTensorView { /// Default constructor HostTensor() {} - /// Constructs a Tensor_view from stride and size - HostTensor(Coord_t const& _stride, Coord_t const& _size) { reset(_stride, _size); } - - /// Constructs a HostTensor from size - infers strides - HostTensor(Coord_t const& _size) { - Coord_t _stride = make_Coord( - _size.at(2) * _size.at(1) * _size.at(0), _size.at(1) * _size.at(0), _size.at(0), 1); - reset(_stride, _size); + /// Constructor for resizing the least significant rank + HostTensor(Index_ size_1D, bool device_backed = true) { + this->resize(size_1D, device_backed); } - /// Returns the number of elements needed to back vector - size_t capacity() { return Base::capacity(); } + /// Helper to construct from pointer, stride, and size + HostTensor( + StorageCoord const &_stride, + TensorCoord const& _size, + bool _device_backed = true + ) { - /// Returns true if the Tensor_view is bound to some memory - bool good() const { return Base::good(); } + this->reset(_stride, _size); + } + + /// Clears the HostTensor allocation to size/capacity = 0 + void reset() { + host_.clear(); + device_.reset(); + Base::reset(); + } + + /// Helper to resize the least significant rank + void resize( + Index_ size_1D, + bool _device_backed = true) { + + TensorCoord _size; + _size[Base::kRank - 1] = size_1D; + for (int i = 0; i < Base::kRank - 1; ++i) { + _size[i] = 1; + } + StorageCoord _stride; + _stride[Base::kStorageRank - 1] = 1; + for (int i = 0; i < Base::kStorageRank - 1; ++i) { + _stride[i] = size_1D; + } + this->reset(_stride, _size, _device_backed); + } /// Updates the reference and size of a Tensor_view object - void reset(Coord_t const& _stride, Coord_t const& _size) { - size_t _capacity = _size.at(0) * _stride.at(0); + void reset( + StorageCoord const& stride, + TensorCoord const& size, + bool _device_backed = true) { + // Construct a temporary TensorView so we can calculate the new capacity + size_t _capacity = Base(nullptr, stride, size).capacity(); + + // Allocate memory DeviceType* _device_memory = nullptr; - if (DeviceBacked) { + if (_device_backed) { _device_memory = cutlass::device_memory::allocate(_capacity); } host_.clear(); host_.resize(_capacity); - for (size_t i = 0; i < _capacity; ++i) { - host_[i] = T((int)0xdeadbeef); - } device_.reset(_device_memory, _capacity); - Base::reset(TensorRef_t(host_.data(), _stride), _size); + Base::reset(TensorRef(host_.data(), stride), size); } - /// Initializes the host tensor as a matrix - void resize_matrix(int rows, int columns, MatrixLayout::Kind layout) { - bool col_major = (layout == MatrixLayout::kColumnMajor); - int ldm = (col_major ? rows : columns); + /// Accesses the tensor reference pointing to data + TensorRef host_ref() { return Base::ref(); } - Coord_t stride = make_Coord(rows * columns, col_major ? 1 : ldm, col_major ? ldm : 1, 1); + /// Accesses the tensor reference pointing to data + TensorRef host_ref() const { return Base::ref(); } - Coord_t size = make_Coord(1, rows, columns, 1); - - reset(stride, size); + /// Accesses the tensor reference pointing to data + DeviceTensorRef device_ref() const { + return DeviceTensorRef(device_data(), this->stride()); } - /// Simplifies resizing the host tensor - void resize(int elements) { resize_matrix(1, elements, MatrixLayout::kColumnMajor); } + /// Accesses the tensor reference pointing to data + HostTensorView host_view() { + return HostTensorView(host_data(), this->stride(), this->size()); + } + + /// Accesses the tensor reference pointing to data + ConstHostTensorView host_view() const { + return HostTensorView(host_data(), this->stride(), this->size()); + } + + /// Accesses the tensor reference pointing to data + DeviceTensorView device_view() const { + return DeviceTensorView(device_data(), this->stride(), this->size()); + } /// Gets pointer to host data - T const* host_data() const { return &host_[0]; } - - /// Gets pointer to host data - T* host_data() { return &host_[0]; } + HostType * host_data() { return host_.data(); } /// Gets pointer to device data - DeviceType* device_data() const { return device_.get(); } + DeviceType* device_data() { return device_.get(); } + + /// Gets pointer to host data + HostType const * host_data() const { return host_.data(); } + + /// Gets pointer to device data + DeviceType * device_data() const { return device_.get(); } + + /// Returns true if device memory is allocated + bool device_backed() const { + return device_.get(); + } /// Copies data from device to host void sync_host() { - if (DeviceBacked) { + if (device_.get()) { device_memory::copy_to_host( - host_.data(), reinterpret_cast(device_.get()), host_.size()); + host_.data(), reinterpret_cast(device_.get()), host_.size()); } } /// Copies data from host to device void sync_device() { - if (DeviceBacked) { + if (device_.get()) { device_memory::copy_to_device( - device_.get(), reinterpret_cast(host_.data()), host_.size()); + device_.get(), + reinterpret_cast(host_.data()), + host_.size()); } } - /// Copy data from a caller-supplied device pointer - void copy_to_host(DeviceType const *ptr_device) { + /// Copy data from a caller-supplied device pointer into host memory + void copy_to_host(DeviceType const* ptr_device) { device_memory::copy_to_host( - host_.data(), reinterpret_cast(ptr_device), host_.size()); + host_.data(), reinterpret_cast(ptr_device), host_.size()); } - /// Copies data to a caller-supplied device pointer - void copy_to_device(DeviceType *ptr_device) { + /// Copies device-to-device + void copy_to_device(DeviceType* ptr_device) { device_memory::copy_to_device( - ptr_device, reinterpret_cast(host_.data()), host_.size()); - } - - /// Accesses the tensor reference pointing to data - TensorRef_t& host_ref() { return Base::ref(); } - - /// Accesses the tensor reference pointing to data - TensorRef_t const& host_ref() const { return Base::ref(); } - - /// Accesses the tensor reference pointing to data - DeviceTensorRef device_ref() const { return DeviceTensorRef(device_data(), stride()); } - - /// Returns a tensor ref to constant memory on the device - ConstDeviceTensorRef const_device_ref() const { - return ConstDeviceTensorRef(device_data(), stride()); - } - - /// Accesses the size - Coord_t const& size() const { return Base::size(); } - - /// Accesses the size - int size(int dim) const { return Base::size(dim); } - - /// Accesses the size - Coord_t const& stride() const { return Base::stride(); } - - /// Accesses the size - int stride(int dim) const { return Base::stride(dim); } - - /// Returns the index of an element - Offset_t offset(Coord_t const& coord) const { return Base::offset(coord); } - - /// Determines whether a location is within a tensor - bool contains(Coord_t const& coord) const { return Base::contains(coord); } - - /// Element-wise accessor - T& at(Coord_t const& coord) const { return Base::at(coord); } - - /// Element-wise accessor - T& operator[](Coord_t const& coord) { return at(coord); } - - /// Element-wise accessor with basic offset - T& at(int idx) const { return Base::at(idx); } - - /// Returns a Tensor_view given location and size quantities - TensorView subview(Coord_t const& _location, Coord_t _size) const { - return Base::subview(_location, _size); - } - - /// Recurses through all dimensions and applies a unary operation - template - void elementwise_in_place(F& op, int dim = 0, Offset_t dst_offset_base = 0) { - Base::elementwise_in_place(op, dim, dst_offset_base); - } - - /// Recurses through all dimensions and applies a unary operator, supplying the logical - /// coordinate within the tensor as an argument - template - void elementwise_stream(F& op, int dim = 0, Offset_t dst_offset_base = 0) { - Base::elementwise_stream(op, dim, dst_offset_base); - } - - /// Recurses through all dimensions and applies a unary operator, supplying the logical - /// coordinate within the tensor as an argument - template - void elementwise_generate(F& op, - int dim = 0, - Offset_t dst_offset_base = 0, - Coord_t coord = Coord_t(0)) { - Base::elementwise_generate(op, dim, dst_offset_base, coord); - } - - /// Recurses through all dimensions and applies a binary operation - template - bool elementwise_in_place(F& op, - int dim, - TensorView const& tensor, - Offset_t dst_offset_base = 0, - Offset_t src_offset_base = 0) { - return Base::elementwise_in_place(op, dim, tensor, dst_offset_base, src_offset_base); + ptr_device, reinterpret_cast(host_.data()), host_.size()); } /// Accumulate in place - template - TensorView& operator+=(TensorView const& tensor) { + template + HostTensor& operator+=(SrcTensorView const& tensor) { Base::operator+=(tensor); sync_device(); return *this; } /// Subtract in place - template - TensorView& operator-=(TensorView const& tensor) { + template + HostTensor& operator-=(SrcTensorView const& tensor) { Base::operator-=(tensor); sync_device(); return *this; } /// Multiply in place - template - TensorView& operator*=(TensorView const& tensor) { + template + HostTensor& operator*=(SrcTensorView const& tensor) { Base::operator*=(tensor); sync_device(); return *this; } /// Divide in place - template - TensorView& operator/=(TensorView const& tensor) { + template + HostTensor& operator/=(SrcTensorView const& tensor) { Base::operator/=(tensor); sync_device(); return *this; } - /// equality with epsilon tolerance - bool equals(TensorView const& tensor, T epsilon) const { - return Base::equals(tensor, epsilon); - } - - /// equality with ulps tolerance - bool bit_equals(TensorView const& tensor, long long ulps_threshold = 0) { - return Base::bit_equals(tensor, ulps_threshold); - } - - /// Computes general matrix product among select dimensions of a tensor - /// Assumes: - /// D: number of independent GEMMs to compute - /// H: height of matrix - /// W: width of matrix - template < - /// Data type of A matrix elements - typename A, - /// Data type of B matrix elements - typename B, - /// Data type of "compute" type (i.e. accumulator) - typename Ctype, - /// Data type of scale factors - typename Stype> - void gemm(TensorView const& tensor_a, TensorView const& tensor_b, Stype alpha, Stype beta) { - Base::template gemm(tensor_a, tensor_b, alpha, beta); - } - /// Fills with random data template void fill_random(Gen generator) { @@ -335,31 +351,38 @@ class HostTensor : public HostTensorView { } /// computes elements as a linear combination of their coordinates - void fill_linear(Coord_t v, T offset = T(0)) { + void fill_linear(TensorCoord v, HostType offset = HostType(0)) { Base::fill_linear(v, offset); sync_device(); } /// computes elements as a linear combination of their coordinates - void fill_sequential(T v = T(1), T offset = T(0)) { + void fill_sequential(HostType v = HostType(1), HostType offset = HostType(0)) { Base::fill_sequential(v, offset); sync_device(); } /// fills with a value - void fill(T val = T(0)) { + void fill(HostType val = HostType(0)) { Base::fill(val); sync_device(); } - /// Copies from external data source and performs type conversion - template - void fill(TensorView const& tensor) { + /// copies from external data source and performs type conversion + template < + typename SrcType, + typename SrcMapFunc_, + int SrcStorageRank_, + typename SrcIndex_, + typename SrcLongIndex_ + > + void fill( + TensorView const& tensor) { Base::fill(tensor); sync_device(); } - - /// Computes the norm of the matrix in double-precision - double norm() const { return Base::norm(); } }; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + } // namespace cutlass diff --git a/tools/util/host_tensor_view.h b/tools/util/host_tensor_view.h index de3227f24..4b7f90c74 100644 --- a/tools/util/host_tensor_view.h +++ b/tools/util/host_tensor_view.h @@ -23,45 +23,77 @@ * **************************************************************************************************/ /*! \file - \brief Host-side implementation of useful operations + \brief Host-side implementation of basic tensor operations. + + See cutlass/tensor_ref.h and cutlass/tensor_view.h for more details. */ #pragma once -#include -#include -#include +#include "cutlass/cutlass.h" +#include "cutlass/tensor_view.h" +#include "tools/util/type_traits.h" namespace cutlass { //////////////////////////////////////////////////////////////////////////////////////////////////// -template -struct Cast { - static inline DstType apply(SrcType src) { return static_cast(src); }; -}; - -template <> -struct Cast { - static inline int8_t apply(float src) { - return static_cast(fmaxf(-128.f, fminf(127.f, src))); - }; -}; - -template <> -struct Cast { - static inline uint8_t apply(float src) { - return static_cast(fmaxf(0.f, fminf(255.f, src))); - }; -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -template -class HostTensorView : public TensorView { +template < + /// Data type of element stored within tensor + typename Storage_, + /// Rank of logical tensor + int Rank_ = 4, + /// Maps a Coord in the logical tensor index space to the internal n-D array + typename MapFunc_ = IdentityTensorMapFunc, + /// Rank of internal n-D array + int StorageRank_ = Rank_, + /// Index type used for coordinates + typename Index_ = int, + /// Index type used for offsets and pointer differences + typename LongIndex_ = long long +> +class HostTensorView : + public TensorView { public: /// Base class - typedef TensorView TensorView_t; + typedef TensorView Base; + + /// Storage type + typedef typename Base::Storage Storage; + + /// Alias for underlying TensorRef + typedef typename Base::TensorRef TensorRef; + + /// Index type + typedef typename Base::Index Index; + + /// Coordinate in logical tensor space + typedef typename TensorRef::TensorCoord TensorCoord; + + /// Coordinate in storage n-D array + typedef typename TensorRef::StorageCoord StorageCoord; + + /// Stride vector in storage coordinate space + /// Least significant stride is = 1 and not stored + typedef typename TensorRef::StrideVector StrideVector; + + /// Long index type for pointer offsets + typedef typename Base::LongIndex LongIndex; + + /// Rank of tensor index space + static int const kRank = Base::kRank; + + // + // Definitions included for backwards compatibility - These will be remmoved + // in the next major release. + // + + /// Base class + typedef Base TensorView_t; + + // + // These definitions are meaningful for rank=4 tensors. + // /// Convention: depth is the first dimension static int const Dim_D = 0; @@ -75,19 +107,8 @@ class HostTensorView : public TensorView { /// Convention: channel is the second dimension static int const Dim_C = 3; - /// Rank of tensor - static int const Rank = TensorView_t::Rank; - - /// Type used to compute the offset of an element to the base of a tensor - typedef typename TensorView_t::Offset_t Offset_t; - - /// Reference and stride - typedef typename TensorView_t::TensorRef_t TensorRef_t; - - /// Coordinate into tensor - typedef typename TensorView_t::Coord_t Coord_t; - public: + // // Device and Host Methods // @@ -95,91 +116,87 @@ class HostTensorView : public TensorView { /// Default constructor HostTensorView() {} - /// Constructs a Tensor_view from a TensorRef and size - HostTensorView(TensorRef_t const& _ref, Coord_t const& _size) : TensorView_t(_ref, _size) {} + /// Helper to construct from pointer, stride, and size + HostTensorView( + Storage_ *_ptr, + StrideVector const &_stride, + TensorCoord const& _size + ) : Base(TensorRef(_ptr, _stride), _size) {} - /// Accesses the size - Coord_t const& size() const { return TensorView_t::size(); } + /// Helper to construct from pointer, stride, and size + HostTensorView( + Storage_ *_ptr, + StorageCoord const &_stride, + TensorCoord const& _size + ) : Base(TensorRef(_ptr, _stride), _size) {} - /// Accesses the size of a specified dimension - int size(int dim) const { return size().at(dim); } - - /// Accesses the stride - Coord_t const& stride() const { return TensorView_t::stride(); } - - /// Accesses the stride along a specified dimension - int stride(int dim) const { return stride().at(dim); } - - /// Returns the number of scalar elements needed to store tensor - size_t capacity() const { return size(3) * stride(3) * stride(2) * stride(1) * stride(0); } - - /// Returns true if the Tensor_view is bound to some memory - bool good() const { return TensorView_t::good(); } - - /// Updates the reference and size of a TensorView object - void reset(TensorRef_t const& _ref = TensorRef_t(0), Coord_t const& _size = Coord_t()) { - return TensorView_t::reset(_ref, _size); - } - - /// Accesses the tensor reference pointing to data - TensorRef_t& ref() { return TensorView_t::ref(); } - - /// Accesses the tensor reference pointing to data - TensorRef_t const& ref() const { return TensorView_t::ref(); } + /// Constructs a Tensor_view from a TensorRef and size assuming dense packing + HostTensorView( + TensorRef const& _ref, + TensorCoord const& _size) : Base(_ref, _size) {} /// Assigns a tensor view - HostTensorView& operator=(TensorView_t const& _tensor) { - reset(_tensor.ref(), _tensor.size()); + HostTensorView& operator=(Base const& _tensor) { + this->reset(_tensor.ref(), _tensor.size()); return *this; } - /// Returns the index of an element - Offset_t offset(Coord_t const& coord) const { return TensorView_t::offset(coord); } + /// Returns a TensorView offset by a given amount + CUTLASS_HOST_DEVICE + HostTensorView operator+(TensorCoord const& b) const { + HostTensorView result(*this); + result.add_pointer_offset(this->offset(b)); + return result; + } - /// Determines whether a location is within a tensor - bool contains(Coord_t const& coord) const { return TensorView_t::contains(coord); } + /// Returns a TensorRef offset by a given amount + CUTLASS_HOST_DEVICE + HostTensorView& operator+=(TensorCoord const& b) { + this->add_pointer_offset(this->offset(b)); + return *this; + } - /// Element-wise accessor - T& at(Coord_t const& coord) const { return TensorView_t::at(coord); } + /// Returns a TensorRef offset by a given amount + CUTLASS_HOST_DEVICE + HostTensorView operator-(TensorCoord const& b) const { + TensorRef result(*this); + result.add_pointer_offset(-this->offset(b)); + return result; + } - /// Element-wise accessor - T& operator[](Coord_t const& coord) const { return at(coord); } - - /// Accesses an element with a raw offset - T& at(int idx) const { return TensorView_t::at(idx); } - - /// Accesses an element with a raw offset - T& operator[](int idx) const { return at(idx); } - - /// Returns a Tensor_view given location and size quantities - TensorView_t subview(Coord_t const& location, Coord_t size) const { - return TensorView_t::subview(location, size); + /// Returns a TensorRef offset by a given amount + CUTLASS_HOST_DEVICE + HostTensorView& operator-=(TensorCoord const& b) { + this->add_pointer_offset(-this->offset(b)); + return *this; } /// Recurses through all dimensions and applies a unary operation in place template - void elementwise_in_place(F& op, int dim = 0, Offset_t dst_offset_base = 0) { - Offset_t dst_offset = dst_offset_base; + void elementwise_in_place(F& op, int dim = 0, TensorCoord const &start_coord = TensorCoord()) { - for (int idx = 0; idx < size(dim); ++idx, dst_offset += stride(dim)) { - if (dim < Rank - 1) { - elementwise_in_place(op, dim + 1, dst_offset); + TensorCoord coord(start_coord); + for (int idx = 0; idx < this->size(dim); ++idx) { + coord[dim] = idx; + if (dim < kRank - 1) { + elementwise_in_place(op, dim + 1, coord); } else { - op(ref().data()[dst_offset]); + op(this->at(coord)); } } } /// Recurses through all dimensions and applies a unary operator with no arguments template - void elementwise_stream(F& op, int dim = 0, Offset_t dst_offset_base = 0) { - Offset_t dst_offset = dst_offset_base; + void elementwise_stream(F& op, int dim = 0, TensorCoord const &start_coord = TensorCoord()) { - for (int idx = 0; idx < size(dim); ++idx, dst_offset += stride(dim)) { - if (dim < Rank - 1) { - elementwise_stream(op, dim + 1, dst_offset); + TensorCoord coord(start_coord); + for (int idx = 0; idx < this->size(dim); ++idx) { + coord[dim] = idx; + if (dim < kRank - 1) { + elementwise_stream(op, dim + 1, coord); } else { - ref().data()[dst_offset] = op(); + this->at(coord) = op(); } } } @@ -189,61 +206,56 @@ class HostTensorView : public TensorView { template void elementwise_generate(F& op, int dim = 0, - Offset_t dst_offset_base = 0, - Coord_t coord = Coord_t(0)) { - Offset_t dst_offset = dst_offset_base; + TensorCoord const & start_coord = TensorCoord()) { - for (int idx = 0; idx < size(dim); ++idx, dst_offset += stride(dim)) { - coord.at(dim) = idx; - - if (dim < Rank - 1) { - elementwise_generate(op, dim + 1, dst_offset, coord); + TensorCoord coord(start_coord); + for (int idx = 0; idx < this->size(dim); ++idx) { + coord[dim] = idx; + if (dim < kRank - 1) { + elementwise_generate(op, dim + 1, coord); } else { - ref().data()[dst_offset] = op(coord); + this->at(coord) = op(coord); } } } /// Recurses through all dimensions and applies a unary operator, supplying the logical - /// coordinate within the tensor as an argument + /// coordinate within the tensor as an argument. Mutable. template void elementwise_visit(F& op, int dim = 0, - Offset_t dst_offset_base = 0, - Coord_t coord = Coord_t(0)) const { - Offset_t dst_offset = dst_offset_base; + TensorCoord const & start_coord = TensorCoord()) const { - for (int idx = 0; idx < size(dim); ++idx, dst_offset += stride(dim)) { - coord.at(dim) = idx; + TensorCoord coord(start_coord); + for (int idx = 0; idx < this->size(dim); ++idx) { + coord[dim] = idx; - if (dim < Rank - 1) { - elementwise_visit(op, dim + 1, dst_offset, coord); + if (dim < kRank - 1) { + elementwise_visit(op, dim + 1, coord); } else { - op(ref().data()[dst_offset], coord); + op(this->at(coord), coord); } } } /// Recurses through all dimensions and applies a binary operation - template + template bool elementwise_in_place(F& op, - TensorView const& tensor, + SrcTensorView const& tensor, int dim = 0, - Offset_t dst_offset_base = 0, - Offset_t src_offset_base = 0) { - Offset_t dst_offset = dst_offset_base; - Offset_t src_offset = src_offset_base; + TensorCoord const &start_coord = TensorCoord()) { - if (size().at(dim) != tensor.size().at(dim)) { + if (this->size(dim) != tensor.size(dim)) { return false; } - for (int idx = 0; idx < size(dim); - ++idx, dst_offset += stride(dim), src_offset += tensor.stride(dim)) { - if (dim < Rank - 1) { - elementwise_in_place(op, tensor, dim + 1, dst_offset, src_offset); + TensorCoord coord(start_coord); + for (int idx = 0; idx < this->size(dim); ++idx) { + coord[dim] = idx; + if (dim < kRank - 1) { + elementwise_in_place(op, tensor, dim + 1, coord); } else { - op(data()[dst_offset], tensor.data()[src_offset]); + op(this->at(coord), tensor.at(coord)); } } @@ -252,55 +264,55 @@ class HostTensorView : public TensorView { template struct LambdaBinaryAddition { - void operator()(T& a, Src b) const { a += T(b); } + void operator()(Storage_& a, Src b) const { a += Storage_(b); } }; template struct LambdaBinarySubtraction { - void operator()(T& a, Src b) const { a -= T(b); } + void operator()(Storage_& a, Src b) const { a -= Storage_(b); } }; template struct LambdaBinaryMultiplication { - void operator()(T& a, Src b) const { a *= T(b); } + void operator()(Storage_& a, Src b) const { a *= Storage_(b); } }; template struct LambdaBinaryDivision { - void operator()(T& a, Src b) const { a /= T(b); } + void operator()(Storage_& a, Src b) const { a /= Storage_(b); } }; /// Accumulate in place - template - TensorView& operator+=(TensorView const& tensor) { - LambdaBinaryAddition op; + template + HostTensorView& operator+=(SrcTensorView const& tensor) { + LambdaBinaryAddition op; elementwise_in_place(op, tensor); return *this; } /// Subtract in place - template - TensorView& operator-=(TensorView const& tensor) { - LambdaBinarySubtraction op; + template + HostTensorView& operator-=(SrcTensorView const& tensor) { + LambdaBinarySubtraction op; elementwise_in_place(op, tensor); return *this; } /// Multiply in place - template - TensorView& operator*=(TensorView const& tensor) { - LambdaBinaryMultiplication op; + template + HostTensorView& operator*=(SrcTensorView const& tensor) { + LambdaBinaryMultiplication op; elementwise_in_place(op, tensor); return *this; } /// Divide in place - template - TensorView& operator/=(TensorView const& tensor) { - LambdaBinaryDivision op; + template + HostTensorView& operator/=(SrcTensorView const& tensor) { + LambdaBinaryDivision op; elementwise_in_place(op, tensor); return *this; @@ -309,19 +321,19 @@ class HostTensorView : public TensorView { /// Comparison operator struct EqualsOperator { bool equal; - T eps; + Storage_ eps; - EqualsOperator(T _epsilon) : equal(true), eps(_epsilon) {} + EqualsOperator(Storage_ _epsilon) : equal(true), eps(_epsilon) {} - void operator()(T a, T b) { - if (std::abs(T(a - b)) > eps * std::max(std::abs(a), std::abs(b))) { + void operator()(Storage_ a, Storage_ b) { + if (std::abs(Storage_(a - b)) > eps * std::max(std::abs(a), std::abs(b))) { equal = false; } } }; /// equality with epsilon tolerance - bool equals(TensorView const& tensor, T epsilon) const { + bool equals(Base const& tensor, Storage epsilon) const { EqualsOperator comparison_op(epsilon); bool equal_size = elementwise_in_place(comparison_op, tensor); @@ -336,13 +348,13 @@ class HostTensorView : public TensorView { BitEqualsOperator(long long _ulps_threshold) : equal(true), eps(_ulps_threshold), index(0) {} - void operator()(T a, T b) { + void operator()(Storage_ a, Storage_ b) { // convert bits to integers long long bits_a = 0; long long bits_b = 0; - *reinterpret_cast(&bits_a) = TypeTraits::remove_negative_zero(a); - *reinterpret_cast(&bits_b) = TypeTraits::remove_negative_zero(b); + *reinterpret_cast(&bits_a) = TypeTraits::remove_negative_zero(a); + *reinterpret_cast(&bits_b) = TypeTraits::remove_negative_zero(b); // compute diff long long ulps = bits_a - bits_b; @@ -354,85 +366,13 @@ class HostTensorView : public TensorView { }; /// equality with ulps tolerance - bool bit_equals(TensorView const& tensor, long long ulps_threshold = 0) { + bool bit_equals(Base const& tensor, long long ulps_threshold = 0) { BitEqualsOperator comparison_op(ulps_threshold); bool equal_size = elementwise_in_place(comparison_op, tensor); return equal_size && comparison_op.equal; } - /// Gets naked pointer to data - T* data() const { return TensorView_t::data(); } - - /// Computes general matrix product among select dimensions of a tensor - /// Assumes: - /// D: number of independent GEMMs to compute - /// H: height of matrix - /// W: width of matrix - /// C: "channels" of each element - template - void gemm(TensorView const& tensor_a, TensorView const& tensor_b, Stype alpha, Stype beta) { - int const Batch = size(Dim_D); - int const M = size(Dim_H); - int const N = size(Dim_W); - int const K = tensor_a.size(Dim_W); - int const C = tensor_a.size(Dim_C); - - // Sizes must match - if (tensor_a.size(Dim_H) != M || tensor_b.size(Dim_W) != N || tensor_b.size(Dim_C) != C || - tensor_b.size(Dim_H) != K) { - return; - } - - int const Mblock = 32; - int const Nblock = 32; - - for (int batch = 0; batch < Batch; ++batch) { - for (int row_block = 0; row_block < M; row_block += Mblock) { - for (int col_block = 0; col_block < N; col_block += Nblock) { - Ctype accum[Mblock][Nblock]; - - for (int j = 0; j < Nblock; j++) { - for (int i = 0; i < Mblock; i++) { - accum[i][j] = Ctype(0); - } - } - - for (int k_block = 0; k_block < K; ++k_block) { - for (int j = 0; j < Nblock; j++) { - for (int i = 0; i < Mblock; i++) { - int row = row_block + i; - int col = col_block + j; - - if (row < M && col < N) { - for (int channel = 0; channel < C; ++channel) { - Ctype a(tensor_a.at(make_Coord(batch, row, k_block, channel))); - Ctype b(tensor_b.at(make_Coord(batch, k_block, col, channel))); - - accum[i][j] += a * b; - } - } - } - } - } - - for (int j = 0; j < Nblock; j++) { - for (int i = 0; i < Mblock; i++) { - int row = row_block + i; - int col = col_block + j; - - Coord_t coord = make_Coord(batch, row, col, 0); - if (row < M && col < N) { - at(coord) = - Cast::apply(alpha * Stype(accum[i][j]) + beta * Stype(at(coord))); - } - } - } - } - } - } - } - /// Fills with random data template void fill_random(Gen generator) { @@ -453,7 +393,9 @@ class HostTensorView : public TensorView { /// Generator to fill a tensor with the identity matrix struct LambdaFillIdentity { - T operator()(Coord_t const& coord) { return (coord.at(1) == coord.at(2) ? T(1) : T(0)); } + Storage_ operator()(TensorCoord const& coord) { + return (coord.at(1) == coord.at(2) ? Storage_(1) : Storage_(0)); + } }; /// initializes with identity @@ -464,39 +406,41 @@ class HostTensorView : public TensorView { /// Lambda for fill_linear() struct LambdaFillLinear { - Coord_t v_; - T offset_; + TensorCoord v_; + Storage_ offset_; - LambdaFillLinear(Coord_t const& _v, T _offset) : v_(_v), offset_(_offset) {} + LambdaFillLinear(TensorCoord const& _v, Storage_ _offset) : v_(_v), offset_(_offset) {} - T operator()(Coord_t const& coord) { return T(v_.template dot(coord)) + offset_; } + Storage_ operator()(TensorCoord const& coord) { + return Storage_(v_.template dot(coord)) + offset_; + } }; /// computes elements as a linear combination of their coordinates - void fill_linear(Coord_t v, T offset = T(0)) { + void fill_linear(TensorCoord v, Storage_ offset = Storage_(0)) { LambdaFillLinear lambda(v, offset); elementwise_generate(lambda); } /// computes elements as a linear combination of their coordinates - void fill_sequential(T v = T(1), T offset = T(0)) { - int const count = size().count(); + void fill_sequential(Storage_ v = Storage_(1), Storage_ offset = Storage_(0)) { + int const count = this->size().count(); for (int i = 0; i < count; ++i) { - data()[i] = T(i); + this->data()[i] = Storage_(i); } } /// Returns a constant value struct LambdaFillValue { - T value; + Storage_ value; - LambdaFillValue(T _value) : value(_value) {} + LambdaFillValue(Storage_ _value) : value(_value) {} - T operator()() { return value; } + Storage_ operator()() { return value; } }; /// fills with a value - void fill(T val = T(0)) { + void fill(Storage_ val = Storage_(0)) { LambdaFillValue op(val); elementwise_stream(op); } @@ -504,13 +448,21 @@ class HostTensorView : public TensorView { /// Conversion from Src to T template struct LambdaAssign { - void operator()(T& a, Src b) const { a = T(b); } + void operator()(Storage_& a, Src b) const { a = Storage_(b); } }; /// copies from external data source and performs type conversion - template - void fill(TensorView const& tensor) { - LambdaAssign op; + template < + typename SrcType, + typename SrcMapFunc_, + int SrcStorageRank_, + typename SrcIndex_, + typename SrcLongIndex_ + > + void fill( + TensorView const& tensor) { + + LambdaAssign op; elementwise_in_place(op, tensor); } @@ -520,7 +472,7 @@ class HostTensorView : public TensorView { LambdaNorm() : sum(0) {} - void operator()(T const& element) { + void operator()(Storage const& element) { double value(element); double conj(element); // TODO - conjugates for complex @@ -540,3 +492,4 @@ class HostTensorView : public TensorView { //////////////////////////////////////////////////////////////////////////////////////////////////// } // namespace cutlass + diff --git a/tools/util/reference/device/kernel/tensor_elementwise.h b/tools/util/reference/device/kernel/tensor_elementwise.h new file mode 100644 index 000000000..31f7a2d8d --- /dev/null +++ b/tools/util/reference/device/kernel/tensor_elementwise.h @@ -0,0 +1,162 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#pragma once + +#include + +#include "cutlass/cutlass.h" + +namespace cutlass { +namespace reference { +namespace device { +namespace kernel { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Kernel to initialize tensor to uniform random distribution +template +__global__ void TensorInitializeUniform( + Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) { + __shared__ curandState_t rng_state[1024]; + + uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x + blockIdx.y * gridDim.x * blockDim.x; + + curand_init(seed, gtid, 0, &rng_state[threadIdx.x]); + + int c_idx = blockIdx.x * blockDim.x + threadIdx.x; + int s_idx = blockIdx.y * blockDim.x; + + tensor += s_idx * ldm + c_idx; + + for (int s_offset = 0; s_offset < blockDim.x; ++s_offset, ++s_idx) { + if (s_idx < dim_strided && c_idx < dim_contiguous) { + double range = dist.uniform.max - dist.uniform.min; + + double rnd = curand_uniform(&rng_state[threadIdx.x]); + + rnd = dist.uniform.min + range * rnd; + + // Random values are cast to integer after scaling by a power of two to facilitate error + // testing + if (dist.int_scale >= 0) { + rnd = double(int(rnd * double(1 << dist.int_scale))); + *tensor = T(rnd / double(1 << dist.int_scale)); + } else { + *tensor = T(rnd); + } + + tensor += ldm; + } + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Kernel to initialize tensor to uniform distribution +template +__global__ void TensorInitializeGaussian( + Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) { + __shared__ curandState_t rng_state[1024]; + + uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x + blockIdx.y * gridDim.x * blockDim.x; + + curand_init(seed, gtid, 0, &rng_state[threadIdx.x]); + + int c_idx = blockIdx.x * blockDim.x + threadIdx.x; + int s_idx = blockIdx.y * blockDim.x; + + tensor += s_idx * ldm + c_idx; + + for (int s_offset = 0; s_offset < blockDim.x; ++s_offset, ++s_idx) { + if (s_idx < dim_strided && c_idx < dim_contiguous) { + // Random values are cast to integer after scaling by a power of two to facilitate error + // testing + + double rnd = curand_normal(&rng_state[threadIdx.x]); + + rnd = dist.gaussian.mean + dist.gaussian.stddev * rnd; + + if (dist.int_scale >= 0) { + rnd = double(int(rnd * double(1 << dist.int_scale))); + *tensor = T(rnd / double(1 << dist.int_scale)); + } else { + *tensor = T(rnd); + } + } + } +} + +/// Kernel to initialize tensor to an identity matrix +template +__global__ void TensorInitializeLinear( + Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) { + __shared__ curandState_t rng_state[1024]; + + uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x + blockIdx.y * gridDim.x * blockDim.x; + + curand_init(seed, gtid, 0, &rng_state[threadIdx.x]); + + int c_idx = blockIdx.x * blockDim.x + threadIdx.x; + int s_idx = blockIdx.y * blockDim.x; + + tensor += s_idx * ldm + c_idx; + + for (int s_offset = 0; s_offset < blockDim.x; ++s_offset, ++s_idx) { + if (s_idx < dim_strided && c_idx < dim_contiguous) { + *tensor = + dist.linear.offset + dist.linear.delta_row * c_idx + dist.linear.delta_column * s_idx; + } + } +} + +/// Kernel to initialize tensor to an identity matrix +template +__global__ void TensorInitializeIdentity( + Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) { + __shared__ curandState_t rng_state[1024]; + + uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x + blockIdx.y * gridDim.x * blockDim.x; + + curand_init(seed, gtid, 0, &rng_state[threadIdx.x]); + + int c_idx = blockIdx.x * blockDim.x + threadIdx.x; + int s_idx = blockIdx.y * blockDim.x; + + tensor += s_idx * ldm + c_idx; + + for (int s_offset = 0; s_offset < blockDim.x; ++s_offset, ++s_idx) { + if (s_idx < dim_strided && c_idx < dim_contiguous) { + *tensor = (c_idx == s_idx ? T(1) : T(0)); + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace kernel +} // namespace device +} // namespace reference +} // namespace cutlass diff --git a/tools/util/reference/device/kernel/tensor_foreach.h b/tools/util/reference/device/kernel/tensor_foreach.h new file mode 100644 index 000000000..5396d5618 --- /dev/null +++ b/tools/util/reference/device/kernel/tensor_foreach.h @@ -0,0 +1,112 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#pragma once + +#include "cutlass/cutlass.h" +#include "cutlass/coord.h" + +namespace cutlass { +namespace reference { +namespace device { +namespace kernel { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Defines several helpers +namespace detail { + +/// Helper to perform for-each operation +template +struct TensorForEachHelper { + + /// Constructor for general rank + __inline__ __device__ + TensorForEachHelper(Func &func, Coord const &size, Coord &coord, int64_t index) { + + int64_t product = 1; + + CUTLASS_PRAGMA_UNROLL + for (int i = Rank - RankRemaining; i < Rank; ++i) { + product *= size[i]; + } + + coord[Rank - 1 - RankRemaining] = index / product; + int64_t remaining = index % product; + + TensorForEachHelper(func, size, coord, remaining); + } +}; + +/// Helper to perform for-each operation +template +struct TensorForEachHelper { + + /// Constructor for fastest chaning rank + __inline__ __device__ + TensorForEachHelper(Func &func, Coord const &size, Coord &coord, int64_t index) { + + coord[Rank - 1] = index; + + if (coord < size) { + func(coord); + } + } +}; + +} // namespace detail + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Helper to perform for-each operation +template +__global__ void TensorForEach(Coord size, Params params = Params()) { + + Func func(params); + + int64_t index = threadIdx.x + blockIdx.x * blockDim.x; + int64_t max_index = 1; + + CUTLASS_PRAGMA_UNROLL + for (int i = 0; i < Rank; ++i) { + max_index *= size[i]; + } + + CUTLASS_PRAGMA_NO_UNROLL + while (index < max_index) { + Coord coord; + + detail::TensorForEachHelper(func, size, coord, index); + index += blockDim.x * gridDim.x; + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace kernel +} // namespace device +} // namespace reference +} // namespace cutlass + diff --git a/tools/util/reference/device/tensor_elementwise.h b/tools/util/reference/device/tensor_elementwise.h new file mode 100644 index 000000000..2b1eb2487 --- /dev/null +++ b/tools/util/reference/device/tensor_elementwise.h @@ -0,0 +1,772 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/* \file + \brief Defines device-side elementwise operations on TensorView. Note, the operations defined + in this header are not specialized for any particular data layout and are therefore not + intended to offer the best possible performance. Rather, they are intended to be generic + reference implementations to support the CUTLASS unit tests. +*/ + +#pragma once + +// Standard Library includes +#include +#include +#include +#include +#include + +// CUDA includes +#include +#include + +// Cutlass includes +#include "cutlass/cutlass.h" +#include "tools/util/device_memory.h" +#include "tools/util/distribution.h" +#include "tools/util/type_traits.h" +#include "tools/util/host_tensor.h" +#include "tools/util/reference/device/tensor_foreach.h" + +namespace cutlass { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace reference { +namespace device { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace detail { + +/// Computes a random uniform distribution +template +struct RandomUniformFunc { + + /// View type + typedef View_ View; + + /// Scalar type + typedef typename View::Storage T; + + /// Coordinate in tensor's index space + typedef typename View::TensorCoord TensorCoord; + + /// Parameters structure + struct Params { + + /// View object + View view; + + /// RNG seed + int64_t seed; + + /// Distriubtion + Distribution dist; + + /// Default ctor + CUTLASS_HOST_DEVICE + Params() { } + + /// Constructor + CUTLASS_HOST_DEVICE + Params( + View const &view, + int64_t seed, + Distribution dist + ): view(view), seed(seed), dist(dist) { } + }; + + // + // Data members + // + + /// Parameters object + Params params; + + /// RNG state object + curandState_t rng_state; + + // + // Methods + // + + /// Device-side initialization of RNG + CUTLASS_DEVICE + RandomUniformFunc(Params const ¶ms): params(params) { + + uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x; + + curand_init(params.seed, gtid, 0, &rng_state); + } + + /// Compute random value and update RNG state + CUTLASS_DEVICE + void operator()(TensorCoord const &coord) { + + double range = params.dist.uniform.max - params.dist.uniform.min; + double rnd = curand_uniform(&rng_state); + rnd = params.dist.uniform.min + range * rnd; + + // Random values are cast to integer after scaling by a power of two to facilitate error + // testing + T result; + if (params.dist.int_scale >= 0) { + rnd = double(int(rnd * double(1 << params.dist.int_scale))); + result = T(rnd / double(1 << params.dist.int_scale)); + } + else { + result = T(rnd); + } + + params.view.at(coord) = result; + } +}; + +/// Computes a random Gaussian distribution +template +struct RandomGaussianFunc { + + /// View type + typedef View_ View; + + /// Scalar type + typedef typename View::Storage T; + + /// Coordinate in tensor's index space + typedef typename View::TensorCoord TensorCoord; + + /// Parameters structure + struct Params { + + /// View object + View view; + + /// RNG seed + int64_t seed; + + /// RNG distribution + Distribution dist; + + /// Default ctor + CUTLASS_HOST_DEVICE + Params() { } + + /// Constructor + CUTLASS_HOST_DEVICE + Params( + View const &view, + int64_t seed, + Distribution dist + ): view(view), seed(seed), dist(dist) { } + }; + + // + // Data members + // + + /// Parameters object + Params params; + + /// RNG state object + curandState_t rng_state; + + // + // Methods + // + + /// Device-side initialization of RNG + CUTLASS_DEVICE + RandomGaussianFunc(Params const ¶ms): params(params) { + + uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x; + + curand_init(params.seed, gtid, 0, &rng_state); + } + + /// Compute random value and update RNG state + CUTLASS_DEVICE + void operator()(TensorCoord const &coord) { + + double rnd = curand_normal(&rng_state); + rnd = params.dist.gaussian.mean + params.dist.gaussian.stddev * rnd; + + T result; + if (params.dist.int_scale >= 0) { + rnd = double(int(rnd * double(1 << params.dist.int_scale))); + result = T(rnd / double(1 << params.dist.int_scale)); + } + else { + result = T(rnd); + } + + params.view.at(coord) = result; + } +}; + +/// Computes a linear combination of each element +template +struct LinearCombinationFunc { + + /// View type + typedef View_ View; + + /// Scalar type + typedef typename View::Storage T; + + /// Coordinate in tensor's index space + typedef typename View::TensorCoord TensorCoord; + + // + // Data members + // + + /// TensorView object + View view; + + /// Delta + Coord delta; + + /// Offset + double offset; + + // + // Methods + // + + /// Constructor + CUTLASS_HOST_DEVICE + LinearCombinationFunc( + View const &view, + Distribution dist + ): view(view) { + + offset = dist.linear.offset; + if (View::kRank >= 1) { + delta[View::kRank - 1] = dist.linear.delta_column; + } + if (View::kRank >= 2) { + delta[View::kRank - 2] = dist.linear.delta_row; + } + // Additional ranks have delta of zero + for (int i = View::kRank - 2; i > 0; --i) { + delta[i - 1] = 0; + } + } + + /// Compute linear combination + CUTLASS_HOST_DEVICE + void operator()(TensorCoord const &coord) { + double result = offset; + CUTLASS_PRAGMA_UNROLL + for (int i = 0; i < View::kRank; ++i) { + result += delta[i] * double(coord[i]); + } + view.at(coord) = T(result); + } +}; + +/// Returns 1 or 0 if the coordinate is along the tensor's diagonal +template +struct IdentityFunc { + + /// TensorView + typedef View_ View; + + /// Scalar type + typedef typename View::Storage T; + + /// Coordinate in tensor's index space + typedef typename View::TensorCoord TensorCoord; + + // + // Data members + // + + /// View object + View view; + + /// Default ctor + CUTLASS_HOST_DEVICE + IdentityFunc(View const &view): view(view) { } + + CUTLASS_HOST_DEVICE + void operator()(TensorCoord const &coord) { + bool equal = true; + CUTLASS_PRAGMA_UNROLL + for (int i = 0; i < View::kRank; ++i) { + if (coord[i] != coord[0]) { + equal = false; + } + } + view.at(coord) = equal ? T(1) : T(0); + } +}; + +} // namespace detail + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Initializes a tensor randomly or procedurally. +template +void TensorInitialize(View const &view, + int64_t seed, + Distribution const &dist) { + + typedef typename View::Storage Scalar; + + switch (dist.kind) { + case Distribution::Uniform: + { + typedef detail::RandomUniformFunc Func; + typedef typename Func::Params Params; + + TensorForEach( + view.size(), + Params(view, seed, dist) + ); + } + break; + case Distribution::Gaussian: + { + typedef detail::RandomGaussianFunc Func; + typedef typename Func::Params Params; + + TensorForEach( + view.size(), + Params(view, seed, dist) + ); + } + break; + case Distribution::Linear: + { + typedef detail::LinearCombinationFunc Func; + TensorForEach( + view.size(), + Func(view, dist)); + } + break; + case Distribution::Identity: + { + typedef detail::IdentityFunc Func; + + Func func(view); + + TensorForEach(view.size(), func); + } + break; + default: + break; + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace device +} // namespace reference + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Dispatcher to appropriate initialization kernel - preserved for backwards compatibility +template +inline void tensor_initialize(Distribution const &dist, + int64_t seed, + int dim_contiguous, + int dim_strided, + T *tensor, + int ldm) { + + TensorView view(tensor, make_Coord(ldm, 1), make_Coord(dim_strided, dim_contiguous)); + reference::device::TensorInitialize(view, seed, dist); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace reference { +namespace device { +namespace detail { + +/// Compares two tensor views of equal rank and dimension. +template +struct TensorEqualsFunc { + + /// Storage type + typedef typename ViewL::Storage T; + + /// Unsigned integer type of same size as View type + typedef typename cutlass::TypeTraits::unsigned_type UnsignedType; + + /// Coordinate in tensor's index space + typedef typename ViewL::TensorCoord TensorCoord; + + /// Assertions + static_assert(ViewL::kRank == ViewR::kRank, + "Cannot compare tensors of different rank"); + + // + // Data members + // + + /// View of left-hand-side tensor + ViewL lhs; + + /// View of right-hand-side tensor + ViewR rhs; + + /// Pointer to result scalar - only written with 0 if values are incorrect + int *result; + + // + // Methods + // + + /// Constructor + CUTLASS_HOST_DEVICE + TensorEqualsFunc(ViewL const &lhs, ViewR const &rhs, int *result): lhs(lhs), rhs(rhs), result(result) { } + + /// Equality check + CUTLASS_HOST_DEVICE + void operator()(TensorCoord const &coord) { + UnsignedType _lhs = reinterpret_cast(lhs.at(coord)); + UnsignedType _rhs = reinterpret_cast(rhs.at(coord)); + if (_lhs != _rhs) { + *result = 0; + } + } +}; + +} // namespace detail + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Returns true if two tensor views are equal. +template +bool TensorEquals(ViewL const &lhs, ViewR const &rhs) { + + // Sizes must be identical + if (lhs.size() != rhs.size()) { + return false; + } + + // Allocate device memory to contain result of kernel reduction + HostTensor result(1); + result.fill(1); + result.sync_device(); + + typedef detail::TensorEqualsFunc Func; + Func func(lhs, rhs, result.device_data()); + + TensorForEach(lhs.size(), func); + result.sync_host(); + + return result.at(0) != 0; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Helper to apply a binary operator in place +template +struct TensorFuncBinaryOp { + + /// Coordinate in tensor's index space + typedef typename ViewL::TensorCoord TensorCoord; + + // + // Data members + // + + /// View of left-hand-side tensor + ViewL lhs; + + /// View of right-hand-side tensor + ViewR rhs; + + /// Binary function applied to each element + BinaryFunc func; + + // + // Methods + // + + /// Constructor + CUTLASS_HOST_DEVICE + TensorFuncBinaryOp( + ViewL const &lhs, + ViewR const &rhs, + BinaryFunc func = BinaryFunc()): lhs(lhs), rhs(rhs), func(func) { } + + /// Equality check + CUTLASS_HOST_DEVICE + void operator()(TensorCoord const &coord) { + lhs.at(coord) = func(lhs.at(coord), rhs.at(coord)); + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace detail { + +/// Helper to apply a binary operator in place +template +struct TensorFillFunc { + + /// Coordinate in tensor's index space + typedef typename ViewL::TensorCoord TensorCoord; + + /// Destination element type + typedef typename ViewL::Storage DestType; + + /// Source element type + typedef typename ViewR::Storage SrcType; + + /// Parameters object + struct Params { + + // + // Data members + // + + /// View of left-hand-side tensor + ViewL lhs; + + /// View of right-hand-side tensor + ViewR rhs; + + /// Source offset coordinate + TensorCoord source_offset; + + /// Size of the subtensor copied from the source + TensorCoord source_size; + + /// Offset in destination + TensorCoord dest_offset; + + // + // Methods + // + + /// Constructs a parameters object for filling a tensor + Params( + ViewL const &lhs, + ViewR const &rhs, + TensorCoord const &source_offset = TensorCoord() + ): + lhs(lhs), rhs(rhs), source_offset(source_offset), source_size(rhs.size() - source_offset) { } + + /// Constructs a parameters object for filling a tensor + Params( + ViewL const &lhs, + ViewR const &rhs, + TensorCoord const &source_offset, + TensorCoord const &source_size, + TensorCoord const &dest_offset = TensorCoord() + ): + lhs(lhs), rhs(rhs), source_offset(source_offset), source_size(source_size), dest_offset(dest_offset) { } + }; + + // + // Data members + // + + Params params; + + // + // Methods + // + + /// Constructor + CUTLASS_HOST_DEVICE + TensorFillFunc( + Params const ¶ms): params(params) { } + + /// Equality check + CUTLASS_HOST_DEVICE + void operator()(TensorCoord const &coord) { + + TensorCoord dst_coord = params.dest_offset + coord; + TensorCoord src_coord = params.source_offset + coord; + + if (dst_coord < params.lhs.size() && src_coord < params.rhs.size()) { + params.lhs.at(dst_coord) = DestType(params.rhs.at(src_coord)); + } + } +}; + +} // namespace detail + +/// Fills a TensorView with the elements from another TensorView +template +void TensorFill( + ViewL lhs, + ViewR rhs, + typename ViewL::TensorCoord const &source_offset, + typename ViewL::TensorCoord const &source_size, + typename ViewL::TensorCoord const &dest_offset) { + + typedef typename ViewL::TensorCoord TensorCoord; + + TensorCoord dst_size = lhs.size() - dest_offset; + TensorCoord src_size = rhs.size() - source_offset; + + TensorCoord fill_size = dst_size.clamp(src_size); + + // Fill function + typedef detail::TensorFillFunc Func; + typedef typename Func::Params Params; + + Params params(lhs, rhs, source_offset, source_size, dest_offset); + + TensorForEach(fill_size, params); +} + +/// Fills a TensorView with the elements from another TensorView +template +void TensorFill( + ViewL lhs, + ViewR rhs, + typename ViewL::TensorCoord const &source_offset = typename ViewL::TensorCoord()) { + + typedef typename ViewL::TensorCoord TensorCoord; + + TensorFill(lhs, rhs, source_offset, rhs.size(), TensorCoord()); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace detail { + +/// Helper to apply a binary operator in place +template +struct TensorFillElementFunc { + + /// Coordinate in tensor's index space + typedef typename ViewL::TensorCoord TensorCoord; + + /// Destination element type + typedef typename ViewL::Storage DestType; + + /// Parameters object + struct Params { + + // + // Data members + // + + /// View of left-hand-side tensor + ViewL lhs; + + /// Source offset coordinate + TensorCoord offset; + + /// Element to overwrite with + DestType value; + + // + // Methods + // + + /// Constructs a parameters object for filling a tensor + CUTLASS_HOST_DEVICE + Params( + ViewL const &lhs, + DestType const &value, + TensorCoord const &offset = TensorCoord() + ): + lhs(lhs), value(value), offset(offset) { } + }; + + // + // Data members + // + + Params params; + + // + // Methods + // + + /// Constructor + CUTLASS_HOST_DEVICE + TensorFillElementFunc( + Params const ¶ms): params(params) { } + + /// Equality check + CUTLASS_HOST_DEVICE + void operator()(TensorCoord const &coord) { + + TensorCoord dst_coord = params.offset + coord; + + if (dst_coord < params.size) { + params.lhs.at(dst_coord) = params.value; + } + } +}; + +} // namespace detail + +/// Method to perform the actual fill +template +void TensorFillElement( + ViewL const &lhs, + typename ViewL::Storage const &value, + typename ViewL::TensorCoord const &offset, + typename ViewL::TensorCoord const &size) { + + // Fill function + typedef detail::TensorFillElementFunc Func; + typedef typename Func::Params Params; + + Params params(lhs, value, offset); + + TensorForEach(size, params); +} + +/// Fills a tensor +template +void TensorFillElement( + ViewL lhs, + typename ViewL::Storage value, + typename ViewL::TensorCoord const &offset =typename ViewL::Storage()) { + + TensorFillElement(lhs, value, offset, lhs.size() - offset); +} + +/// Constructs a parameters object for filling a tensor +template +void TensorFillElement( + ViewL lhs, + typename ViewL::Storage value, + typename ViewL::Storage const &offset, + typename ViewL::Storage const &size) { + + TensorFillElement(lhs, value, offset, size); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace device +} // namespace reference +} // namespace cutlass + diff --git a/tools/util/reference/device/tensor_foreach.h b/tools/util/reference/device/tensor_foreach.h new file mode 100644 index 000000000..1c3a72a6c --- /dev/null +++ b/tools/util/reference/device/tensor_foreach.h @@ -0,0 +1,72 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#pragma once + +#include +#include "cutlass/cutlass.h" +#include "tools/util/reference/device/kernel/tensor_foreach.h" + +namespace cutlass { +namespace reference { +namespace device { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Launches a kernel for each element in a tensor's index space. +template +struct TensorForEach { + + /// Constructor performs the operation. + TensorForEach(Coord size, Params params = Params(), int grid_size = 0, int block_size = 0) { + + if (!grid_size || !block_size) { + + // if grid_size or block_size are zero, query occupancy using the CUDA Occupancy API + cudaError_t result = cudaOccupancyMaxPotentialBlockSize( + &grid_size, + &block_size, + reinterpret_cast(kernel::TensorForEach)); + + if (result != cudaSuccess) { + throw std::runtime_error("Failed to query occupancy."); + } + + // Limit block size. This has the effect of increasing the number of items processed by a + // single thread and reduces the impact of initialization overhead. + block_size = (block_size < 128 ? block_size : 128); + } + + dim3 grid(grid_size, 1, 1); + dim3 block(block_size, 1, 1); + + kernel::TensorForEach<<< grid, block >>>(size, params); + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace device +} // namespace reference +} // namesace cutlass diff --git a/tools/util/reference/host/gemm.h b/tools/util/reference/host/gemm.h new file mode 100644 index 000000000..dd20532cc --- /dev/null +++ b/tools/util/reference/host/gemm.h @@ -0,0 +1,270 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Reference implementation for GEMM in host-side code. +*/ + +#pragma once + +#include "cutlass/coord.h" +#include "cutlass/matrix_traits.h" +#include "cutlass/tensor_view.h" +#include "cutlass/gemm/gemm_coord.h" + +namespace cutlass { +namespace reference { +namespace host { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace detail { + +/// Template function to compute an inner product. +template +Ctype inner_product(Atype a, Btype b, Ctype c) { + return Ctype(a) * Ctype(b) + c; +} + +/// Specialization for matrix multiplication with binary operands +template <> +inline int inner_product, Vector, int>( + Vector a, + Vector b, + int c) { + + int accum = 0; + for (int bit = 0; bit < 32; bit++) { + accum += a[bit] ^ b[bit]; + } + return accum + c; +} + +/// Specialization for matrix multiplication with signed 4-bit integer operands +template <> inline +int inner_product, Vector, int>( + Vector a, + Vector b, + int c) { + + int accum = 0; + for (int k = 0; k < 8; k++) { + accum += a[k] * b[k]; + } + return accum + c; +} + +/// Specialization for matrix multiplication with unsigned 4-bit integer operands +template <> inline +int inner_product, Vector, int>( + Vector a, + Vector b, + int c) { + + int accum = 0; + for (int k = 0; k < 8; k++) { + accum += a[k] * b[k]; + } + return accum + c; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Cast { + // Default behavior: convert to the destination type + static inline DstType apply(SrcType src) { return static_cast(src); }; +}; + +template <> +struct Cast { + static inline int8_t apply(float src) { + // Clamp to the range of signed 8-bit integers. + return static_cast(fmaxf(-128.f, fminf(127.f, src))); + }; +}; + +template <> +struct Cast { + static inline uint8_t apply(float src) { + // Clamp to the range of signed 8-bit integers. + return static_cast(fmaxf(0.f, fminf(255.f, src))); + }; +}; + +} // namespace detail + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef +/// objects. +/// +/// Explicitly naming types needed by this template can be cumbersome, particularly for the +/// accumulator type, so a function argument 'initial_accum' is exposed. Passing +/// AccumulatorType(0) as the last function argument can be easier than naming all template +/// arguments explicitly. +template < + typename TensorRefA, + typename TensorRefB, + typename TensorRefC, + typename ScalarType, + typename AccumulatorType +> +void Gemm( + gemm::GemmCoord problem_size, + ScalarType alpha, + TensorRefA tensor_a, + TensorRefB tensor_b, + ScalarType beta, + TensorRefC tensor_c, + AccumulatorType initial_accum) { + + typedef typename TensorRefA::Storage AType; + typedef typename TensorRefB::Storage BType; + typedef typename TensorRefC::Storage CType; + + static_assert( + TensorRefA::kRank == 2 && + TensorRefB::kRank == 2 && + TensorRefC::kRank == 2, "Tensors must be of rank 2"); + + // Note: batch is ignored. + int const M = problem_size.m(); + int const N = problem_size.n(); + int const K = problem_size.k(); + + // Blocking necessary to speedup reference implementation + int const Mblock = 32; + int const Nblock = 32; + + for (int row_block = 0; row_block < M; row_block += Mblock) { + for (int col_block = 0; col_block < N; col_block += Nblock) { + AccumulatorType accum[Mblock][Nblock]; + + for (int j = 0; j < Nblock; j++) { + for (int i = 0; i < Mblock; i++) { + accum[i][j] = initial_accum; + } + } + + for (int k_block = 0; k_block < K; ++k_block) { + for (int j = 0; j < Nblock; j++) { + for (int i = 0; i < Mblock; i++) { + int row = row_block + i; + int col = col_block + j; + + if (row < M && col < N) { + AType a = tensor_a.at(MatrixCoord(row, k_block)); + BType b = tensor_b.at(MatrixCoord(k_block, col)); + + accum[i][j] = detail::inner_product(a, b, accum[i][j]); + } + } + } + } + + for (int j = 0; j < Nblock; j++) { + for (int i = 0; i < Mblock; i++) { + int row = row_block + i; + int col = col_block + j; + + MatrixCoord coord = MatrixCoord(row, col); + if (row < M && col < N) { + + tensor_c.at(coord) = detail::Cast::apply( + alpha * ScalarType(accum[i][j]) + + beta * ScalarType(tensor_c.at(coord))); + } + } + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef +/// objects. +/// +/// This assumes the accumulator type is the same type as the scalars. +template < + typename TensorRefA, + typename TensorRefB, + typename TensorRefC, + typename ScalarType +> +void Gemm( + gemm::GemmCoord problem_size, + ScalarType alpha, + TensorRefA tensor_a, + TensorRefB tensor_b, + ScalarType beta, + TensorRefC tensor_c) { + + Gemm(problem_size, alpha, tensor_a, tensor_b, beta, tensor_c, ScalarType(0)); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Computes a batch of GEMMs over a set of matrices of common dimension. +template < + typename TensorRefCollectionA, + typename TensorRefCollectionB, + typename TensorRefCollectionC, + typename ScalarType, + typename AccumulatorType +> +void BatchGemm( + gemm::GemmCoord problem_size, + ScalarType alpha, + TensorRefCollectionA const& tensor_a, + TensorRefCollectionB const& tensor_b, + ScalarType beta, + TensorRefCollectionC &tensor_c, + AccumulatorType initial_accum = AccumulatorType(0)) { + + typename TensorRefCollectionA::ConstIterator tensor_a_it = tensor_a.begin(); + typename TensorRefCollectionB::ConstIterator tensor_b_it = tensor_b.begin(); + typename TensorRefCollectionC::ConstIterator tensor_c_it = tensor_c.begin(); + + for (int batch = 0; + batch < problem_size.batch(); + ++batch, ++tensor_a_it, ++tensor_b_it, ++tensor_c_it) { + + Gemm( + problem_size, + alpha, + *tensor_a_it, + *tensor_b_it, + beta, + *tensor_c_it, + initial_accum); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace host +} // namespace reference +} // namespace cutlass diff --git a/tools/util/reference/host/tensor_elementwise.h b/tools/util/reference/host/tensor_elementwise.h new file mode 100644 index 000000000..88f46bcdf --- /dev/null +++ b/tools/util/reference/host/tensor_elementwise.h @@ -0,0 +1,478 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/* \file + \brief Defines host-side elementwise operations on TensorView. +*/ + +#pragma once + +// Standard Library includes +#include +#include +#include +#include +#include +#include +#include + +// Cutlass includes +#include "cutlass/cutlass.h" +#include "tools/util/distribution.h" +#include "tools/util/type_traits.h" +#include "tools/util/reference/host/tensor_foreach.h" + +namespace cutlass { +namespace reference { +namespace host { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace detail { + +/// Computes a random uniform distribution +template +struct RandomUniformFunc { + + /// View type + typedef View_ View; + + /// Scalar type + typedef typename View::Storage T; + + /// Coordinate in tensor's index space + typedef typename View::TensorCoord TensorCoord; + + /// Parameters structure + struct Params { + + /// View object + View view; + + /// RNG seed + unsigned seed; + + /// Distriubtion + Distribution dist; + + /// Default ctor + Params() { } + + /// Constructor + Params( + View const &view, + unsigned seed, + Distribution dist + ): view(view), seed(seed), dist(dist) { } + }; + + // + // Data members + // + + /// Parameters object + Params params; + + // + // Methods + // + + /// Device-side initialization of RNG + RandomUniformFunc(Params const ¶ms): params(params) { + std::srand(params.seed); + } + + /// Compute random value and update RNG state + void operator()(TensorCoord const &coord) { + + double range = params.dist.uniform.max - params.dist.uniform.min; + + double rnd = double(std::rand()) / double(RAND_MAX); + + rnd = params.dist.uniform.min + range * rnd; + + // Random values are cast to integer after scaling by a power of two to facilitate error + // testing + T result; + if (params.dist.int_scale >= 0) { + rnd = double(int(rnd * double(1 << params.dist.int_scale))); + result = T(rnd / double(1 << params.dist.int_scale)); + } + else { + result = T(rnd); + } + + params.view.at(coord) = result; + } +}; + +/// Computes a random Gaussian distribution +template +struct RandomGaussianFunc { + + /// View type + typedef View_ View; + + /// Scalar type + typedef typename View::Storage T; + + /// Coordinate in tensor's index space + typedef typename View::TensorCoord TensorCoord; + + /// Parameters structure + struct Params { + + /// View object + View view; + + /// RNG seed + unsigned seed; + + /// RNG distribution + Distribution dist; + + /// Default ctor + Params() { } + + /// Constructor + Params( + View const &view, + unsigned seed, + Distribution dist + ): view(view), seed(seed), dist(dist) { } + }; + + // + // Data members + // + + /// Parameters object + Params params; + + /// Constant PI + double pi; + + // + // Methods + // + + /// Device-side initialization of RNG + RandomGaussianFunc(Params const ¶ms): params(params) { + pi = std::acos(-1); + } + + /// Compute random value and update RNG state + void operator()(TensorCoord const &coord) { + + // Box-Muller transform to generate random numbers with Normal distribution + double u1 = double(std::rand()) / double(RAND_MAX); + double u2 = double(std::rand()) / double(RAND_MAX); + + double rnd = std::sqrt(-2 * std::log(u1)) * std::cos(2 * pi * u2); + + // Scale according to Gaussian distribution parameters + rnd = params.dist.gaussian.mean + params.dist.gaussian.stddev * rnd; + + T result; + if (params.dist.int_scale >= 0) { + rnd = double(int(rnd * double(1 << params.dist.int_scale))); + result = T(rnd / double(1 << params.dist.int_scale)); + } + else { + result = T(rnd); + } + + params.view.at(coord) = result; + } +}; + +/// Computes a linear combination of each element +template +struct LinearCombinationFunc { + + /// View type + typedef View_ View; + + /// Scalar type + typedef typename View::Storage T; + + /// Coordinate in tensor's index space + typedef typename View::TensorCoord TensorCoord; + + // + // Data members + // + + /// TensorView object + View view; + + /// Delta + Coord delta; + + /// Offset + double offset; + + // + // Methods + // + + /// Constructor + LinearCombinationFunc( + View const &view, + Distribution dist + ): view(view) { + + offset = dist.linear.offset; + if (View::kRank >= 1) { + delta[View::kRank - 1] = dist.linear.delta_column; + } + if (View::kRank >= 2) { + delta[View::kRank - 2] = dist.linear.delta_row; + } + // Additional ranks have delta of zero + for (int i = View::kRank - 2; i > 0; --i) { + delta[i - 1] = 0; + } + } + + /// Compute linear combination + void operator()(TensorCoord const &coord) { + double result = offset; + + for (int i = 0; i < View::kRank; ++i) { + result += delta[i] * double(coord[i]); + } + view.at(coord) = T(result); + } +}; + +/// Returns 1 or 0 if the coordinate is along the tensor's diagonal +template +struct IdentityFunc { + + /// TensorView + typedef View_ View; + + /// Scalar type + typedef typename View::Storage T; + + /// Coordinate in tensor's index space + typedef typename View::TensorCoord TensorCoord; + + // + // Data members + // + + /// View object + View view; + + /// Default ctor + IdentityFunc(View const &view): view(view) { } + + /// Computes an identity + void operator()(TensorCoord const &coord) { + bool equal = true; + for (int i = 0; i < View::kRank; ++i) { + if (coord[i] != coord[0]) { + equal = false; + } + } + view.at(coord) = equal ? T(1) : T(0); + } +}; + +} // namespace detail + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Initializes a tensor randomly or procedurally. +template +void TensorInitialize(View const &view, + unsigned seed, + Distribution const &dist) { + + typedef typename View::Storage Scalar; + + switch (dist.kind) { + case Distribution::Uniform: + { + typedef detail::RandomUniformFunc Func; + typedef typename Func::Params Params; + + TensorForEach( + view.size(), + Params(view, seed, dist) + ); + } + break; + case Distribution::Gaussian: + { + typedef detail::RandomGaussianFunc Func; + typedef typename Func::Params Params; + + TensorForEach( + view.size(), + Params(view, seed, dist) + ); + } + break; + case Distribution::Linear: + { + typedef detail::LinearCombinationFunc Func; + TensorForEach( + view.size(), + Func(view, dist)); + } + break; + case Distribution::Identity: + { + typedef detail::IdentityFunc Func; + + Func func(view); + + TensorForEach(view.size(), func); + } + break; + default: + break; + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace detail { + +/// Compares two tensor views of equal rank and dimension. +template +struct TensorEqualsFunc { + + /// Storage type + typedef typename ViewL::Storage T; + + /// Unsigned integer type of same size as View type + typedef typename cutlass::TypeTraits::unsigned_type UnsignedType; + + /// Coordinate in tensor's index space + typedef typename ViewL::TensorCoord TensorCoord; + + /// Assertions + static_assert(ViewL::kRank == ViewR::kRank, + "Cannot compare tensors of different rank"); + + // + // Data members + // + + /// View of left-hand-side tensor + ViewL lhs; + + /// View of right-hand-side tensor + ViewR rhs; + + /// Pointer to result scalar - only written with 0 if values are incorrect + int *result; + + // + // Methods + // + + /// Constructor + TensorEqualsFunc(ViewL const &lhs, ViewR const &rhs, int *result): lhs(lhs), rhs(rhs), result(result) { } + + /// Equality check + void operator()(TensorCoord const &coord) { + UnsignedType _lhs = reinterpret_cast(lhs.at(coord)); + UnsignedType _rhs = reinterpret_cast(rhs.at(coord)); + if (_lhs != _rhs) { + *result = 0; + } + } +}; + +} // namespace detail + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Returns true if two tensor views are equal. +template +bool TensorEquals(ViewL const &lhs, ViewR const &rhs) { + + // Sizes must be identical + if (lhs.size() != rhs.size()) { + return false; + } + + int result = 1; + + typedef detail::TensorEqualsFunc Func; + Func func(lhs, rhs, &result); + + TensorForEach(lhs.size(), func); + + return result != 0; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Helper to apply a binary operator in place +template +struct TensorFuncBinaryOp { + + /// Coordinate in tensor's index space + typedef typename ViewL::TensorCoord TensorCoord; + + // + // Data members + // + + /// View of left-hand-side tensor + ViewL lhs; + + /// View of right-hand-side tensor + ViewR rhs; + + /// Binary function applied to each element + BinaryFunc func; + + // + // Methods + // + + /// Constructor + TensorFuncBinaryOp( + ViewL const &lhs, + ViewR const &rhs, + BinaryFunc func = BinaryFunc()): lhs(lhs), rhs(rhs), func(func) { } + + /// Equality check + void operator()(TensorCoord const &coord) { + lhs.at(coord) = func(lhs.at(coord), rhs.at(coord)); + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace host +} // namespace reference +} // namespace cutlass diff --git a/tools/util/reference/host/tensor_foreach.h b/tools/util/reference/host/tensor_foreach.h new file mode 100644 index 000000000..bd4455693 --- /dev/null +++ b/tools/util/reference/host/tensor_foreach.h @@ -0,0 +1,102 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#pragma once + +#include +#include "cutlass/cutlass.h" +#include "tools/util/reference/device/kernel/tensor_foreach.h" + +namespace cutlass { +namespace reference { +namespace host { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Defines several helpers +namespace detail { + +/// Helper to perform for-each operation +template +struct TensorForEachHelper { + + /// Index of the active rank + static int const kActiveRank = Rank - RankRemaining - 1; + + /// Constructor for general rank + TensorForEachHelper( + Func &func, + Coord const &size, + Coord &coord) { + + for (int i = 0; i < size.at(kActiveRank); ++i) { + coord[kActiveRank] = i; + TensorForEachHelper(func, size, coord); + } + } +}; + +/// Helper to perform for-each operation +template +struct TensorForEachHelper { + + /// Index of the active rank + static int const kActiveRank = Rank - 1; + + /// Constructor for fastest chaning rank + TensorForEachHelper( + Func &func, + Coord const &size, + Coord &coord) { + + for (int i = 0; i < size.at(kActiveRank); ++i) { + coord[kActiveRank] = i; + func(coord); + } + } +}; + +} // namespace detail + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Iterates over the index space of a tensor +template +struct TensorForEach { + + /// Constructor performs the operation. + TensorForEach(Coord size, Params params = Params()) { + + Func func(params); + Coord coord; + + detail::TensorForEachHelper(func, size, coord); + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace host +} // namespace reference +} // namespace cutlass diff --git a/tools/util/tensor_view_io.h b/tools/util/tensor_view_io.h index 6e9cd6bcf..c1b954eae 100644 --- a/tools/util/tensor_view_io.h +++ b/tools/util/tensor_view_io.h @@ -24,38 +24,135 @@ **************************************************************************************************/ #pragma once -#include -#include +#include "cutlass/core_io.h" +#include "cutlass/tensor_view.h" -template -inline std::ostream& tensor_view_output(std::ostream& out, T t) { - out << t; - return out; -} +namespace cutlass { -template <> -inline std::ostream& tensor_view_output(std::ostream& out, int8_t t) { - out << int(t); - return out; -} +/////////////////////////////////////////////////////////////////////////////////////////////////// -template -inline std::ostream& operator<<(std::ostream& out, cutlass::TensorView const& tensor) { - for (int batch = 0; batch < tensor.size(0); ++batch) { - out << "[\n "; - for (int h = 0; h < tensor.size(1); ++h) { - for (int w = 0; w < tensor.size(2); ++w) { - for (int c = 0; c < tensor.size(3); ++c) { - out << ((c | w) ? ", " : ""); - tensor_view_output(out, tensor.at(cutlass::make_Coord(batch, h, w, c))); - } - } - if (h + 1 < tensor.size(1)) { - out << " ;\n "; - } +namespace detail { + +/// Helper to write the least significant rank of a TensorView +template < + typename Storage_, + int Rank_, + typename MapFunc_, + int StorageRank_, + typename Index_, + typename LongIndex_ +> +inline std::ostream & TensorView_WriteLeastSignificantRank( + std::ostream& out, + cutlass::TensorView< + Storage_, + Rank_, + MapFunc_, + StorageRank_, + Index_, + LongIndex_> const& tensor, + cutlass::Coord const &start_coord, + int rank, + std::streamsize width) { + + for (int idx = 0; idx < tensor.size(rank); ++idx) { + + Coord coord(start_coord); + coord[rank] = idx; + + if (idx) { + out.width(0); + out << ", "; } - out << " ]"; + if (idx || coord) { + out.width(width); + } + out << ScalarIO(tensor.at(coord)); } return out; } + +/// Helper to write a rank of a TensorView +template < + typename Storage_, + int Rank_, + typename MapFunc_, + int StorageRank_, + typename Index_, + typename LongIndex_ +> +inline std::ostream & TensorView_WriteRank( + std::ostream& out, + cutlass::TensorView< + Storage_, + Rank_, + MapFunc_, + StorageRank_, + Index_, + LongIndex_> const& tensor, + cutlass::Coord const &start_coord, + int rank, + std::streamsize width) { + + // If called on the least significant rank, write the result as a row + if (rank + 1 == Rank_) { + return TensorView_WriteLeastSignificantRank(out, tensor, start_coord, rank, width); + } + + // Otherwise, write a sequence of rows and newlines + for (int idx = 0; idx < tensor.size(rank); ++idx) { + + Coord coord(start_coord); + coord[rank] = idx; + + if (rank + 2 == Rank_) { + // Write least significant ranks asa matrix with rows delimited by ";\n" + out << (idx ? ";\n" : ""); + TensorView_WriteLeastSignificantRank(out, tensor, coord, rank + 1, width); + } + else { + // Higher ranks are separated by newlines + out << (idx ? "\n" : ""); + TensorView_WriteRank(out, tensor, coord, rank + 1, width); + } + } + + return out; +} + +} // namespace detail + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Prints human-readable representation of a TensorView to an ostream +template < + typename Storage_, + int Rank_, + typename MapFunc_, + int StorageRank_, + typename Index_, + typename LongIndex_ +> +inline std::ostream& operator<<( + std::ostream& out, + TensorView< + Storage_, + Rank_, + MapFunc_, + StorageRank_, + Index_, + LongIndex_> const& tensor) { + + // Prints a TensorView according to the following conventions: + // - least significant rank is printed as rows separated by ";\n" + // - all greater ranks are delimited with newlines + // + // The result is effectively a whitespace-delimited series of 2D matrices. + + return detail::TensorView_WriteRank(out, tensor, Coord(), 0, out.width()); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/tools/util/type_traits.h b/tools/util/type_traits.h index 995cf96fc..50d478cc2 100644 --- a/tools/util/type_traits.h +++ b/tools/util/type_traits.h @@ -33,12 +33,52 @@ #include #include "half.h" +#include "cutlass/vector.h" +#include "cutlass/util/complex.h" namespace cutlass { struct half_t; template -struct TypeTraits; +struct TypeTraits { + typedef T host_type; + typedef T device_type; + static inline T remove_negative_zero(T x) { return x; } + static inline T to_print(T x) { return x; } +}; + +template <> +struct TypeTraits > { + static cudaDataType_t const cublas_type = CUDA_R_32I; + typedef Vector host_type; + typedef Vector device_type; + typedef uint32_t integer_type; + typedef uint32_t unsigned_type; + static inline uint32_t remove_negative_zero(uint32_t x) { return x; } + static inline uint32_t to_print(uint32_t x) { return x; } +}; + +template <> +struct TypeTraits< Vector > { + static cudaDataType_t const cublas_type = CUDA_R_32I; + typedef Vector host_type; + typedef Vector device_type; + typedef uint32_t integer_type; + typedef uint32_t unsigned_type; + static inline uint32_t remove_negative_zero(uint32_t x) { return x; } + static inline uint32_t to_print(uint32_t x) { return x; } +}; + +template <> +struct TypeTraits< Vector > { + static cudaDataType_t const cublas_type = CUDA_R_32I; + typedef Vector host_type; + typedef Vector device_type; + typedef uint32_t integer_type; + typedef uint32_t unsigned_type; + static inline uint32_t remove_negative_zero(uint32_t x) { return x; } + static inline uint32_t to_print(uint32_t x) { return x; } +}; template <> struct TypeTraits { @@ -158,4 +198,73 @@ struct TypeTraits { static inline double remove_negative_zero(double x) { return x == -0.0 ? 0.0 : x; } static inline double to_print(double x) { return x; } }; + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Complex types +// +/////////////////////////////////////////////////////////////////////////////////////////////////// + +template <> +struct TypeTraits > { + static cudaDataType_t const cublas_type = CUDA_C_16F; + typedef platform::complex host_type; + typedef platform::complex device_type; + typedef int16_t integer_type; + typedef uint16_t unsigned_type; +}; + +template <> +struct TypeTraits > { + static cudaDataType_t const cublas_type = CUDA_C_16F; + typedef platform::complex host_type; + typedef platform::complex device_type; + typedef int16_t integer_type; + typedef uint16_t unsigned_type; + static inline platform::complex remove_negative_zero(platform::complex x) { + return platform::complex( + real(x) == -0.f ? half_t(0) : real(x), + imag(x) == -0.f ? half_t(0) : imag(x) + ); + } + static inline platform::complex to_print(platform::complex x) { return x; } +}; + +template <> +struct TypeTraits > { + + static cudaDataType_t const cublas_type = CUDA_C_32F; + typedef platform::complex host_type; + typedef platform::complex device_type; + typedef int64_t integer_type; + typedef uint64_t unsigned_type; + + static inline platform::complex remove_negative_zero(platform::complex x) { + return platform::complex( + real(x) == -0.f ? 0.f : real(x), + imag(x) == -0.f ? 0.f : imag(x) + ); + } + + static inline platform::complex to_print(platform::complex x) { return x; } +}; + +template <> +struct TypeTraits > { + static cudaDataType_t const cublas_type = CUDA_C_64F; + typedef platform::complex host_type; + typedef platform::complex device_type; + struct integer_type { int64_t real, imag; }; + struct unsigned_type { uint64_t real, imag; }; + static inline platform::complex remove_negative_zero(platform::complex x) { + return platform::complex( + real(x) == -0.0 ? 0.0 : real(x), + imag(x) == -0.0 ? 0.0 : imag(x) + ); + } + static inline platform::complex to_print(platform::complex x) { return x; } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + } // namespace cutlass From d7137f9c0a1633b76455109373887e1640713b5d Mon Sep 17 00:00:00 2001 From: akerr Date: Wed, 19 Sep 2018 14:02:08 -0700 Subject: [PATCH 2/4] Updated doxygen --- docs/annotated.html | 649 ++++---- ...dicateVector_1_1ConstIterator-members.html | 2 +- ...s_1_1PredicateVector_1_1ConstIterator.html | 2 +- ..._1PredicateVector_1_1Iterator-members.html | 2 +- ...utlass_1_1PredicateVector_1_1Iterator.html | 2 +- docs/classcutlass_1_1TensorRef-members.html | 61 +- docs/classcutlass_1_1TensorRef.html | 1318 ++++++++++------ docs/classcutlass_1_1TensorRef.png | Bin 0 -> 2451 bytes ...nsorRefArray_1_1ConstIterator-members.html | 101 ++ ...ss_1_1TensorRefArray_1_1ConstIterator.html | 440 ++++++ ...BatchStrided_1_1ConstIterator-members.html | 102 ++ ...ensorRefBatchStrided_1_1ConstIterator.html | 476 ++++++ ...Index___00_01LongIndex___01_4-members.html | 124 ++ ...11_00_01Index___00_01LongIndex___01_4.html | 1092 +++++++++++++ docs/classcutlass_1_1TensorView-members.html | 95 +- docs/classcutlass_1_1TensorView.html | 1289 +++++++++------- docs/classcutlass_1_1TensorView.png | Bin 690 -> 1605 bytes ...asscutlass_1_1ZipTileIterator-members.html | 125 ++ docs/classcutlass_1_1ZipTileIterator.html | 1290 ++++++++++++++++ ..._1_1detail_1_1ScalarOrPointer-members.html | 101 ++ ...scutlass_1_1detail_1_1ScalarOrPointer.html | 434 ++++++ ...earScalingDevicePtr_1_1Params-members.html | 98 ++ ...m_1_1LinearScalingDevicePtr_1_1Params.html | 389 +++++ ...utlass_1_1platform_1_1complex-members.html | 100 ++ docs/classcutlass_1_1platform_1_1complex.html | 413 +++++ ...ss_1_1platform_1_1unique__ptr-members.html | 2 +- ...asscutlass_1_1platform_1_1unique__ptr.html | 2 +- docs/classes.html | 174 ++- docs/clear__accumulators_8h.html | 4 +- docs/clear__accumulators_8h_source.html | 7 +- docs/complex_8h.html | 263 ++++ docs/complex_8h_source.html | 123 ++ docs/convert_8h.html | 4 +- docs/convert_8h_source.html | 4 +- docs/coord_8h.html | 22 +- docs/coord_8h_source.html | 80 +- docs/core__io_8h.html | 79 +- docs/core__io_8h_source.html | 12 +- docs/cutlass_8h.html | 64 +- docs/cutlass_8h_source.html | 7 +- docs/cutlass__math_8h.html | 14 +- docs/cutlass__math_8h_source.html | 14 +- docs/debug_8h.html | 2 +- docs/debug_8h_source.html | 2 +- docs/dgemm__traits_8h.html | 20 +- docs/dgemm__traits_8h_source.html | 16 +- .../dir_1417ee5ebebc309c36b7962f26a92c39.html | 29 +- .../dir_18d6a367a3982a494d65599933fc67a3.html | 32 +- .../dir_c5917a9a879e9a6c73eaf5237444ab84.html | 6 +- docs/files.html | 119 +- docs/fp16__sgemm__multiply__add_8h.html | 111 ++ .../fp16__sgemm__multiply__add_8h_source.html | 107 ++ docs/fp16__sgemm__traits_8h.html | 117 ++ docs/fp16__sgemm__traits_8h_source.html | 104 ++ docs/fragment_8h.html | 10 +- docs/fragment_8h_source.html | 82 +- docs/fragment__multiply__add_8h.html | 8 +- docs/fragment__multiply__add_8h_source.html | 30 +- docs/functions.html | 104 +- docs/functions_0x7e.html | 2 +- docs/functions_b.html | 68 +- docs/functions_c.html | 91 +- docs/functions_d.html | 60 +- docs/functions_e.html | 12 +- docs/functions_enum.html | 8 +- docs/functions_eval.html | 40 +- docs/functions_f.html | 132 +- docs/functions_func.html | 27 +- docs/functions_func_0x7e.html | 2 +- docs/functions_func_b.html | 10 +- docs/functions_func_c.html | 65 +- docs/functions_func_d.html | 24 +- docs/functions_func_e.html | 8 +- docs/functions_func_f.html | 19 +- docs/functions_func_g.html | 60 +- docs/functions_func_h.html | 11 +- docs/functions_func_i.html | 103 +- docs/functions_func_k.html | 98 ++ docs/functions_func_l.html | 50 +- docs/functions_func_m.html | 31 +- docs/functions_func_n.html | 89 ++ docs/functions_func_o.html | 151 +- docs/functions_func_p.html | 22 +- docs/functions_func_r.html | 35 +- docs/functions_func_s.html | 68 +- docs/functions_func_t.html | 40 +- docs/functions_func_u.html | 2 +- docs/functions_func_v.html | 18 +- docs/functions_func_w.html | 7 +- docs/functions_func_z.html | 95 ++ docs/functions_g.html | 133 +- docs/functions_h.html | 11 +- docs/functions_i.html | 214 +-- docs/functions_k.html | 203 ++- docs/functions_l.html | 77 +- docs/functions_m.html | 53 +- docs/functions_n.html | 13 +- docs/functions_o.html | 160 +- docs/functions_p.html | 89 +- docs/functions_r.html | 43 +- docs/functions_s.html | 374 +++-- docs/functions_t.html | 194 ++- docs/functions_type.html | 69 +- docs/functions_type_b.html | 35 +- docs/functions_type_c.html | 25 +- docs/functions_type_d.html | 24 +- docs/functions_type_e.html | 6 +- docs/functions_type_f.html | 64 +- docs/functions_type_g.html | 67 +- docs/functions_type_i.html | 75 +- docs/functions_type_k.html | 86 ++ docs/functions_type_l.html | 10 +- docs/functions_type_m.html | 15 +- docs/functions_type_n.html | 2 +- docs/functions_type_o.html | 9 +- docs/functions_type_p.html | 23 +- docs/functions_type_s.html | 211 +-- docs/functions_type_t.html | 117 +- docs/functions_type_v.html | 8 +- docs/functions_type_w.html | 4 +- docs/functions_type_y.html | 2 +- docs/functions_u.html | 2 +- docs/functions_v.html | 27 +- docs/functions_vars.html | 12 +- docs/functions_vars_b.html | 26 +- docs/functions_vars_c.html | 5 +- docs/functions_vars_d.html | 16 +- docs/functions_vars_e.html | 2 +- docs/functions_vars_f.html | 23 +- docs/functions_vars_g.html | 14 +- docs/functions_vars_i.html | 22 +- docs/functions_vars_k.html | 157 +- docs/functions_vars_l.html | 21 +- docs/functions_vars_m.html | 13 +- docs/functions_vars_n.html | 9 +- docs/functions_vars_o.html | 86 ++ docs/functions_vars_p.html | 28 +- docs/functions_vars_r.html | 10 +- docs/functions_vars_s.html | 73 +- docs/functions_vars_t.html | 29 +- docs/functions_vars_v.html | 3 +- docs/functions_w.html | 9 +- docs/functions_y.html | 2 +- docs/functions_z.html | 95 ++ docs/gemm_8h.html | 26 +- docs/gemm_8h_source.html | 74 +- docs/gemm__config_8h.html | 109 ++ docs/gemm__config_8h_source.html | 123 ++ docs/gemm__coord_8h.html | 111 ++ docs/gemm__coord_8h_source.html | 131 ++ docs/gemm__desc_8h.html | 111 ++ docs/gemm__desc_8h_source.html | 121 ++ docs/gemm__epilogue_8h.html | 19 +- docs/gemm__epilogue_8h_source.html | 73 +- docs/gemm__epilogue__traits_8h.html | 24 +- docs/gemm__epilogue__traits_8h_source.html | 121 +- docs/gemm__global__stream_8h.html | 20 +- docs/gemm__global__stream_8h_source.html | 93 +- docs/gemm__global__tile_8h.html | 16 +- docs/gemm__global__tile_8h_source.html | 235 +-- docs/gemm__operand_8h.html | 8 +- docs/gemm__operand_8h_source.html | 24 +- docs/gemm__shared__stream_8h.html | 5 +- docs/gemm__shared__stream_8h_source.html | 53 +- docs/gemm__shared__tile_8h.html | 4 +- docs/gemm__shared__tile_8h_source.html | 181 +-- docs/gemm__stream__pair_8h.html | 132 ++ docs/gemm__stream__pair_8h_source.html | 142 ++ docs/gemm__traits_8h.html | 38 +- docs/gemm__traits_8h_source.html | 267 ++-- docs/globals.html | 11 +- docs/globals_defs.html | 5 +- docs/globals_func.html | 6 +- docs/group__IdentityBlockSwizzle.html | 84 + docs/group__fragment__concept.html | 5 +- docs/group__fragment__iterator__concept.html | 2 +- docs/group__layout__concept.html | 2 +- docs/group__predicate__iterator__concept.html | 2 +- docs/group__predicate__tile__adapter.html | 2 +- docs/group__predicate__vector__concept.html | 2 +- .../group__tile__load__iterator__concept.html | 4 +- ...group__tile__store__iterator__concept.html | 4 +- docs/group__tile__traits__concept.html | 4 +- docs/hgemm__global__tile_8h.html | 10 +- docs/hgemm__global__tile_8h_source.html | 16 +- docs/hgemm__multiply__add_8h.html | 10 +- docs/hgemm__multiply__add_8h_source.html | 31 +- docs/hgemm__swizzle_8h.html | 4 +- docs/hgemm__swizzle_8h_source.html | 8 +- docs/hgemm__traits_8h.html | 30 +- docs/hgemm__traits_8h_source.html | 122 +- docs/hierarchy.html | 685 +++++---- docs/igemm__epilogue_8h.html | 16 +- docs/igemm__epilogue_8h_source.html | 50 +- docs/igemm__global__tile_8h.html | 16 +- docs/igemm__global__tile_8h_source.html | 41 +- docs/igemm__multiply__add_8h.html | 10 +- docs/igemm__multiply__add_8h_source.html | 31 +- docs/igemm__swizzle_8h.html | 4 +- docs/igemm__swizzle_8h_source.html | 8 +- docs/igemm__traits_8h.html | 46 +- docs/igemm__traits_8h_source.html | 141 +- docs/index.html | 2 +- docs/iterator__access_8h.html | 70 +- docs/iterator__access_8h_source.html | 20 +- docs/kernel__launch_8h.html | 108 ++ docs/kernel__launch_8h_source.html | 95 ++ docs/linear__scaling_8h.html | 15 +- docs/linear__scaling_8h_source.html | 36 +- docs/linear__scaling__device__ptr_8h.html | 114 ++ ...inear__scaling__device__ptr_8h_source.html | 109 ++ docs/load__store_8h.html | 42 +- docs/load__store_8h_source.html | 83 +- docs/matrix__traits_8h.html | 28 +- docs/matrix__traits_8h_source.html | 91 +- docs/menudata.js | 17 + docs/modules.html | 17 +- docs/namespacecutlass.html | 1356 ++++++++--------- docs/namespacecutlass_1_1detail.html | 95 ++ docs/namespacecutlass_1_1gemm.html | 144 +- docs/namespacecutlass_1_1platform.html | 1185 +++++++++++++- docs/namespacemembers.html | 146 +- docs/namespacemembers_func.html | 144 +- docs/namespacemembers_type.html | 2 +- docs/namespaces.html | 7 +- docs/numeric__types_8h.html | 107 ++ docs/numeric__types_8h_source.html | 91 ++ docs/platform_8h.html | 4 +- docs/platform_8h_source.html | 166 +- docs/predicate__vector_8h.html | 11 +- docs/predicate__vector_8h_source.html | 128 +- docs/reshape__tile_8h.html | 4 +- docs/reshape__tile_8h_source.html | 6 +- docs/scalar__or__pointer_8h.html | 109 ++ docs/scalar__or__pointer_8h_source.html | 101 ++ docs/search/all_0.js | 1 + docs/search/all_1.js | 20 +- docs/search/all_10.js | 21 +- docs/search/all_11.js | 130 +- docs/search/all_12.js | 77 +- docs/search/all_13.js | 1 + docs/search/all_14.js | 17 +- docs/search/all_15.js | 5 +- docs/search/all_17.js | 9 +- docs/search/all_18.html | 30 + docs/search/all_18.js | 4 + docs/search/all_2.js | 19 +- docs/search/all_3.js | 55 +- docs/search/all_4.js | 21 +- docs/search/all_5.js | 7 +- docs/search/all_6.js | 45 +- docs/search/all_7.js | 87 +- docs/search/all_8.js | 5 +- docs/search/all_9.js | 71 +- docs/search/all_a.js | 93 +- docs/search/all_b.js | 40 +- docs/search/all_c.js | 27 +- docs/search/all_d.js | 7 +- docs/search/all_e.js | 53 +- docs/search/all_f.js | 22 +- docs/search/classes_1.js | 1 + docs/search/classes_10.js | 33 +- docs/search/classes_11.js | 9 +- docs/search/classes_12.js | 15 +- docs/search/classes_13.html | 30 + docs/search/classes_13.js | 5 + docs/search/classes_14.html | 30 + docs/search/classes_14.js | 8 + docs/search/classes_2.js | 18 +- docs/search/classes_3.js | 5 +- docs/search/classes_5.js | 11 +- docs/search/classes_6.js | 20 +- docs/search/classes_8.js | 17 +- docs/search/classes_9.js | 12 +- docs/search/classes_a.js | 22 +- docs/search/classes_b.js | 8 +- docs/search/classes_c.js | 13 +- docs/search/classes_d.js | 23 +- docs/search/classes_e.js | 45 +- docs/search/classes_f.js | 62 +- docs/search/defines_1.js | 1 + docs/search/enums_0.js | 2 +- docs/search/enumvalues_1.js | 17 +- docs/search/enumvalues_2.js | 19 +- docs/search/enumvalues_3.js | 2 +- docs/search/enumvalues_4.html | 30 + docs/search/enumvalues_4.js | 4 + docs/search/enumvalues_5.html | 30 + docs/search/enumvalues_5.js | 4 + docs/search/files_0.js | 1 + docs/search/files_10.html | 30 + docs/search/files_10.js | 6 + docs/search/files_2.js | 3 +- docs/search/files_3.js | 4 + docs/search/files_5.js | 1 - docs/search/files_6.js | 3 +- docs/search/files_7.js | 4 +- docs/search/files_8.js | 3 +- docs/search/files_9.js | 2 +- docs/search/files_a.js | 4 +- docs/search/files_b.js | 6 +- docs/search/files_c.js | 4 +- docs/search/files_d.js | 16 +- docs/search/files_e.js | 7 +- docs/search/files_f.html | 30 + docs/search/files_f.js | 9 + docs/search/functions_0.js | 3 +- docs/search/functions_1.js | 7 +- docs/search/functions_10.js | 19 +- docs/search/functions_11.js | 21 +- docs/search/functions_12.js | 14 +- docs/search/functions_13.js | 2 +- docs/search/functions_14.js | 3 +- docs/search/functions_15.html | 30 + docs/search/functions_15.js | 5 + docs/search/functions_16.html | 30 + docs/search/functions_16.js | 7 + docs/search/functions_17.html | 30 + docs/search/functions_17.js | 4 + docs/search/functions_2.js | 4 +- docs/search/functions_3.js | 31 +- docs/search/functions_4.js | 8 +- docs/search/functions_5.js | 7 +- docs/search/functions_6.js | 12 +- docs/search/functions_7.js | 29 +- docs/search/functions_8.js | 5 +- docs/search/functions_9.js | 31 +- docs/search/functions_a.js | 11 +- docs/search/functions_b.js | 17 +- docs/search/functions_c.js | 43 +- docs/search/functions_d.js | 6 +- docs/search/functions_e.js | 38 +- docs/search/functions_f.js | 22 +- docs/search/groups_1.js | 2 +- docs/search/groups_2.js | 4 +- docs/search/groups_3.js | 6 +- docs/search/groups_4.html | 30 + docs/search/groups_4.js | 6 + docs/search/namespaces_0.js | 1 + docs/search/searchdata.js | 16 +- docs/search/typedefs_0.js | 9 +- docs/search/typedefs_1.js | 7 +- docs/search/typedefs_10.js | 4 +- docs/search/typedefs_11.js | 2 +- docs/search/typedefs_12.html | 30 + docs/search/typedefs_12.js | 4 + docs/search/typedefs_2.js | 9 +- docs/search/typedefs_3.js | 3 +- docs/search/typedefs_4.js | 2 +- docs/search/typedefs_5.js | 19 +- docs/search/typedefs_6.js | 32 +- docs/search/typedefs_7.js | 14 +- docs/search/typedefs_8.js | 2 +- docs/search/typedefs_9.js | 5 +- docs/search/typedefs_a.js | 5 +- docs/search/typedefs_b.js | 4 +- docs/search/typedefs_c.js | 6 +- docs/search/typedefs_d.js | 32 +- docs/search/typedefs_e.js | 52 +- docs/search/typedefs_f.js | 29 +- docs/search/variables_0.js | 5 +- docs/search/variables_1.js | 9 +- docs/search/variables_10.js | 9 +- docs/search/variables_11.html | 30 + docs/search/variables_11.js | 5 + docs/search/variables_2.js | 1 + docs/search/variables_3.js | 6 +- docs/search/variables_5.js | 8 +- docs/search/variables_6.js | 5 +- docs/search/variables_7.js | 16 +- docs/search/variables_8.js | 74 +- docs/search/variables_9.js | 8 +- docs/search/variables_a.js | 5 +- docs/search/variables_b.js | 2 +- docs/search/variables_c.js | 7 +- docs/search/variables_d.js | 10 +- docs/search/variables_e.js | 18 +- docs/search/variables_f.js | 22 +- docs/sgemm__traits_8h.html | 23 +- docs/sgemm__traits_8h_source.html | 19 +- docs/shape_8h.html | 20 +- docs/shape_8h_source.html | 23 +- docs/structDebugType.html | 88 ++ docs/structDebugValue.html | 88 ++ docs/structcutlass_1_1AlignedStruct.html | 2 +- ...ass_1_1ComputeOffsetFromShape-members.html | 4 +- ...ructcutlass_1_1ComputeOffsetFromShape.html | 12 +- ...s_1_1ComputeOffsetFromStrides-members.html | 4 +- ...ctcutlass_1_1ComputeOffsetFromStrides.html | 12 +- ...omputeThreadOffsetFromStrides-members.html | 2 +- ...ass_1_1ComputeThreadOffsetFromStrides.html | 2 +- ...011_03ed682791cf043da79a7cc93228a8c85.html | 2 +- ...011_0e75281d7e02fa191f5d498e10e25dc1b.html | 2 +- ...01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html | 2 +- ...01T__f2e6d84a53db391977c787a65ed62aca.html | 2 +- ..._1_1ConstPredicateTileAdapter-members.html | 2 +- ...tcutlass_1_1ConstPredicateTileAdapter.html | 2 +- docs/structcutlass_1_1Convert.html | 2 +- ...3_01Ob568b5e19b6f78a5fa50d1f821f0bc2a.html | 2 +- ...3_01Ofca5985d18bcb54bc1f49355f3cee121.html | 2 +- docs/structcutlass_1_1Coord-members.html | 62 +- docs/structcutlass_1_1Coord.html | 633 +++++--- docs/structcutlass_1_1Copy-members.html | 2 +- docs/structcutlass_1_1Copy.html | 2 +- docs/structcutlass_1_1DumpType.html | 92 ++ docs/structcutlass_1_1Extent-members.html | 2 +- docs/structcutlass_1_1Extent.html | 2 +- ...or_3_01T_00_01Lanes_01_4_01_4-members.html | 2 +- ...3_01Vector_3_01T_00_01Lanes_01_4_01_4.html | 2 +- ..._00_01Lanes_01_4_01const_01_4-members.html | 2 +- ...or_3_01T_00_01Lanes_01_4_01const_01_4.html | 2 +- docs/structcutlass_1_1Fragment-members.html | 13 +- docs/structcutlass_1_1Fragment.html | 77 +- ...lass_1_1FragmentConstIterator-members.html | 14 +- ...tructcutlass_1_1FragmentConstIterator.html | 76 +- ...utlass_1_1FragmentElementType-members.html | 93 ++ .../structcutlass_1_1FragmentElementType.html | 125 ++ ...ctcutlass_1_1FragmentIterator-members.html | 16 +- docs/structcutlass_1_1FragmentIterator.html | 88 +- .../structcutlass_1_1GemmOperand-members.html | 2 +- docs/structcutlass_1_1GemmOperand.html | 2 +- docs/structcutlass_1_1Identity-members.html | 2 +- docs/structcutlass_1_1Identity.html | 2 +- ...lass_1_1IdentityTensorMapFunc-members.html | 92 ++ ...tructcutlass_1_1IdentityTensorMapFunc.html | 166 ++ ...uctcutlass_1_1IteratorAdvance-members.html | 2 +- docs/structcutlass_1_1IteratorAdvance.html | 2 +- ..._1_1KernelLaunchConfiguration-members.html | 94 ++ ...tcutlass_1_1KernelLaunchConfiguration.html | 213 +++ ...ctcutlass_1_1KernelLaunchConfiguration.png | Bin 0 -> 2229 bytes docs/structcutlass_1_1Load-members.html | 10 +- docs/structcutlass_1_1Load.html | 36 +- ...1_1kS1b28106546bd22002cd52e60197548ce.html | 171 +++ ...1_1kS3de71ba9ef724a37fb1cf315da8bf9b5.html | 171 +++ ...1_1kS840de374cd1e3ee3a4aa6a70a9fa83d1.html | 92 ++ ...1_1kS91ff65dd36f118bca4542df6128d6d99.html | 171 +++ ...1_1kS966cdf9ba449f056a92458bf87878053.html | 92 ++ ...1_1kSc8e65d97c8f615e98ac2e03f0bcd236e.html | 92 ++ ...1_1kSf1f63874ddc1302ed9e60e0478e1a8ad.html | 174 +++ ...1_1kSf2952bcb62d20c76c595eac4c59f7239.html | 92 ++ ...1_1kW847237836867f08e1121b00b7e44d8ae.html | 92 ++ ...1_1kWe58d0048e24352beeec002fd483c53b2.html | 171 +++ ..._01Fr16ad55dd7bca84e6a7fc608c26eee889.html | 92 ++ ..._01Fra240d9c28383cd3945277ec3a927c538.html | 171 +++ ...01Fra03475418f27732d726d18feb23feeed2.html | 92 ++ ...01Fra934529165fa8fecdd392b5302d25ef26.html | 171 +++ ..._01Fr23088b868c6eeec9377c46892553686f.html | 171 +++ ..._01Frc8b0aa6ddd9b4317158c26574a6881de.html | 92 ++ ...0_01d9e675253ca19588f1ae4bd898579523f.html | 92 ++ ...00_01double_00_01kStride_00_0116_01_4.html | 171 +++ .../structcutlass_1_1MatrixCoord-members.html | 140 ++ docs/structcutlass_1_1MatrixCoord.html | 748 +++++++++ docs/structcutlass_1_1MatrixCoord.png | Bin 0 -> 640 bytes ...structcutlass_1_1MatrixLayout-members.html | 2 +- docs/structcutlass_1_1MatrixLayout.html | 25 +- ..._1MatrixLayout_1_1ColumnMajor-members.html | 92 ++ ...utlass_1_1MatrixLayout_1_1ColumnMajor.html | 161 ++ ...out_1_1ColumnMajorBlockLinear-members.html | 95 ++ ...atrixLayout_1_1ColumnMajorBlockLinear.html | 257 ++++ ...out_1_1ColumnMajorInterleaved-members.html | 94 ++ ...atrixLayout_1_1ColumnMajorInterleaved.html | 230 +++ ...rixLayout_1_1ContiguousLayout-members.html | 95 ++ ...s_1_1MatrixLayout_1_1ContiguousLayout.html | 254 +++ ...s_1_1MatrixLayout_1_1RowMajor-members.html | 92 ++ ...ctcutlass_1_1MatrixLayout_1_1RowMajor.html | 161 ++ ...Layout_1_1RowMajorBlockLinear-members.html | 95 ++ ..._1MatrixLayout_1_1RowMajorBlockLinear.html | 257 ++++ ...Layout_1_1RowMajorInterleaved-members.html | 94 ++ ..._1MatrixLayout_1_1RowMajorInterleaved.html | 230 +++ ...uctcutlass_1_1MatrixTransform-members.html | 93 ++ docs/structcutlass_1_1MatrixTransform.html | 126 ++ docs/structcutlass_1_1Max-members.html | 91 ++ docs/structcutlass_1_1Max.html | 126 ++ .../structcutlass_1_1MemorySpace-members.html | 2 +- docs/structcutlass_1_1MemorySpace.html | 2 +- docs/structcutlass_1_1Min-members.html | 91 ++ docs/structcutlass_1_1Min.html | 126 ++ ...tlass_1_1PredicateTileAdapter-members.html | 2 +- ...structcutlass_1_1PredicateTileAdapter.html | 2 +- ...uctcutlass_1_1PredicateVector-members.html | 2 +- docs/structcutlass_1_1PredicateVector.html | 2 +- ...cateVector_1_1TrivialIterator-members.html | 2 +- ...1_1PredicateVector_1_1TrivialIterator.html | 2 +- ...s_1_1PredicatedTileLoadStream-members.html | 111 ++ ...ctcutlass_1_1PredicatedTileLoadStream.html | 402 +++++ ...uctcutlass_1_1PredicatedTileLoadStream.png | Bin 0 -> 1349 bytes ..._1_1PredicatedTileStoreStream-members.html | 112 ++ ...tcutlass_1_1PredicatedTileStoreStream.html | 466 ++++++ ...ctcutlass_1_1PredicatedTileStoreStream.png | Bin 0 -> 1350 bytes ..._1RegularTilePredicateFunctor-members.html | 94 ++ ...utlass_1_1RegularTilePredicateFunctor.html | 223 +++ .../structcutlass_1_1ReshapeTile-members.html | 2 +- docs/structcutlass_1_1ReshapeTile.html | 2 +- ...1kAccessSize___00_01true_01_4-members.html | 2 +- ...e___00_01kAccessSize___00_01true_01_4.html | 2 +- docs/structcutlass_1_1ScalarIO-members.html | 93 ++ docs/structcutlass_1_1ScalarIO.html | 188 +++ docs/structcutlass_1_1Shape-members.html | 2 +- docs/structcutlass_1_1Shape.html | 2 +- docs/structcutlass_1_1ShapeAdd-members.html | 2 +- docs/structcutlass_1_1ShapeAdd.html | 2 +- docs/structcutlass_1_1ShapeCount-members.html | 2 +- docs/structcutlass_1_1ShapeCount.html | 2 +- docs/structcutlass_1_1ShapeDiv-members.html | 2 +- docs/structcutlass_1_1ShapeDiv.html | 2 +- ...uctcutlass_1_1ShapeDivCeiling-members.html | 91 ++ docs/structcutlass_1_1ShapeDivCeiling.html | 118 ++ docs/structcutlass_1_1ShapeMax-members.html | 2 +- docs/structcutlass_1_1ShapeMax.html | 2 +- docs/structcutlass_1_1ShapeMin-members.html | 2 +- docs/structcutlass_1_1ShapeMin.html | 2 +- docs/structcutlass_1_1ShapeMul-members.html | 2 +- docs/structcutlass_1_1ShapeMul.html | 2 +- docs/structcutlass_1_1ShapeScale-members.html | 2 +- docs/structcutlass_1_1ShapeScale.html | 2 +- ...structcutlass_1_1ShapeStrides-members.html | 8 +- docs/structcutlass_1_1ShapeStrides.html | 18 +- docs/structcutlass_1_1ShapeSub-members.html | 2 +- docs/structcutlass_1_1ShapeSub.html | 2 +- .../structcutlass_1_1StorageType-members.html | 8 +- docs/structcutlass_1_1StorageType.html | 18 +- ...ass_1_1StorageType_3_011_01_4-members.html | 2 +- ...ructcutlass_1_1StorageType_3_011_01_4.html | 2 +- ...ass_1_1StorageType_3_012_01_4-members.html | 2 +- ...ructcutlass_1_1StorageType_3_012_01_4.html | 2 +- ...ass_1_1StorageType_3_014_01_4-members.html | 2 +- ...ructcutlass_1_1StorageType_3_014_01_4.html | 2 +- docs/structcutlass_1_1Store-members.html | 10 +- docs/structcutlass_1_1Store.html | 36 +- ..._1_1k004b304998a534d76357f834068909f8.html | 92 ++ ..._1_1k12f5c8a016a307e76de374322fc00a66.html | 92 ++ ..._1_1k220d5790f803f10840e2a92fb9a51dac.html | 171 +++ ..._1_1k28cc0b88a16efca73d258128312d2a7e.html | 171 +++ ..._1_1k40d038d4bce377843c21a56ebf97d011.html | 171 +++ ..._1_1k60eedca420c41e94fd40b41299967ef2.html | 92 ++ ..._1_1k775a1d27affec5236489735ed4503c92.html | 92 ++ ..._1_1k84da7dcd68ee74b8d2bdb67885b0ca56.html | 92 ++ ..._1_1kd9a7e85f80a21c504388612a60462417.html | 171 +++ ..._1_1ke6d73d34fa7b5254cf828804a19842e1.html | 171 +++ ...00_013d38935f41bf709e067932b9e042255a.html | 92 ++ ...00_0160391c6be5cb1d3f99e012a6a18e486d.html | 171 +++ ...ructcutlass_1_1TensorRefArray-members.html | 101 ++ docs/structcutlass_1_1TensorRefArray.html | 398 +++++ ...lass_1_1TensorRefBatchStrided-members.html | 134 ++ ...tructcutlass_1_1TensorRefBatchStrided.html | 537 +++++++ ...structcutlass_1_1TensorRefBatchStrided.png | Bin 0 -> 1810 bytes ...00_01LongIndex___01_4_1_1StrideVector.html | 97 ++ ...ructcutlass_1_1TileAllocation-members.html | 101 ++ docs/structcutlass_1_1TileAllocation.html | 366 +++++ docs/structcutlass_1_1TileCoord-members.html | 151 ++ docs/structcutlass_1_1TileCoord.html | 1127 ++++++++++++++ docs/structcutlass_1_1TileCoord.png | Bin 0 -> 720 bytes ...ctcutlass_1_1TileIteratorBase-members.html | 54 +- docs/structcutlass_1_1TileIteratorBase.html | 402 ++--- docs/structcutlass_1_1TileIteratorBase.png | Bin 2958 -> 2987 bytes ...1_1TileIteratorBase_1_1Params-members.html | 30 +- ...cutlass_1_1TileIteratorBase_1_1Params.html | 328 +++- ...tcutlass_1_1TileIteratorBase_1_1Params.png | Bin 4162 -> 4455 bytes ...ctcutlass_1_1TileLoadIterator-members.html | 102 +- docs/structcutlass_1_1TileLoadIterator.html | 1015 +++++++----- docs/structcutlass_1_1TileLoadIterator.png | Bin 2304 -> 2306 bytes ...1_1TileLoadIterator_1_1Params-members.html | 44 +- ...cutlass_1_1TileLoadIterator_1_1Params.html | 499 ++++-- ...tcutlass_1_1TileLoadIterator_1_1Params.png | Bin 3293 -> 3444 bytes ...ructcutlass_1_1TileLoadStream-members.html | 106 ++ docs/structcutlass_1_1TileLoadStream.html | 525 +++++++ docs/structcutlass_1_1TileLoadStream.png | Bin 0 -> 1352 bytes ...s_1_1TileLoadStream_1_1Params-members.html | 93 ++ ...ctcutlass_1_1TileLoadStream_1_1Params.html | 188 +++ ..._1_1TileLoadStream_1_1PredicateVector.html | 95 ++ ...tcutlass_1_1TileStoreIterator-members.html | 103 +- docs/structcutlass_1_1TileStoreIterator.html | 1205 ++++++++++----- docs/structcutlass_1_1TileStoreIterator.png | Bin 2309 -> 2318 bytes ..._1TileStoreIterator_1_1Params-members.html | 43 +- ...utlass_1_1TileStoreIterator_1_1Params.html | 424 +++++- ...cutlass_1_1TileStoreIterator_1_1Params.png | Bin 2388 -> 2515 bytes ...uctcutlass_1_1TileStoreStream-members.html | 107 ++ docs/structcutlass_1_1TileStoreStream.html | 556 +++++++ docs/structcutlass_1_1TileStoreStream.png | Bin 0 -> 1351 bytes ..._1_1TileStoreStream_1_1Params-members.html | 93 ++ ...tcutlass_1_1TileStoreStream_1_1Params.html | 188 +++ ...1_1TileStoreStream_1_1PredicateVector.html | 95 ++ docs/structcutlass_1_1TileTraits-members.html | 16 +- docs/structcutlass_1_1TileTraits.html | 113 +- ..._1_1TileTraitsContiguousMajor-members.html | 2 +- ...tcutlass_1_1TileTraitsContiguousMajor.html | 2 +- ...cutlass_1_1TileTraitsStandard-members.html | 11 +- docs/structcutlass_1_1TileTraitsStandard.html | 29 +- ...lass_1_1TileTraitsStrideMajor-members.html | 2 +- ...tructcutlass_1_1TileTraitsStrideMajor.html | 2 +- ...cutlass_1_1TileTraitsWarpRake-members.html | 2 +- docs/structcutlass_1_1TileTraitsWarpRake.html | 2 +- ...raitsWarpRake_1_1ThreadOffset-members.html | 2 +- ...1_1TileTraitsWarpRake_1_1ThreadOffset.html | 2 +- ...tcutlass_1_1TiledThreadOffset-members.html | 2 +- docs/structcutlass_1_1TiledThreadOffset.html | 2 +- ..._1TrivialPredicateTileAdapter-members.html | 2 +- ...utlass_1_1TrivialPredicateTileAdapter.html | 2 +- ...structcutlass_1_1VectorTraits-members.html | 2 +- docs/structcutlass_1_1VectorTraits.html | 2 +- ...or_3_01T_00_01Lanes_01_4_01_4-members.html | 2 +- ...3_01Vector_3_01T_00_01Lanes_01_4_01_4.html | 2 +- ..._00_01Lanes_01_4_01const_01_4-members.html | 2 +- ...or_3_01T_00_01Lanes_01_4_01const_01_4.html | 2 +- docs/structcutlass_1_1Vectorize-members.html | 2 +- docs/structcutlass_1_1Vectorize.html | 2 +- ..._0132_01_4_00_01kLanes___01_4-members.html | 91 ++ ...n1__t_00_0132_01_4_00_01kLanes___01_4.html | 118 ++ ...0_018_01_4_00_01kLanes___01_4-members.html | 91 ++ ...nt4__t_00_018_01_4_00_01kLanes___01_4.html | 118 ++ ...0_018_01_4_00_01kLanes___01_4-members.html | 91 ++ ...nt4__t_00_018_01_4_00_01kLanes___01_4.html | 118 ++ docs/structcutlass_1_1ZipConvert-members.html | 99 ++ docs/structcutlass_1_1ZipConvert.html | 340 +++++ .../structcutlass_1_1ZipFragment-members.html | 98 ++ docs/structcutlass_1_1ZipFragment.html | 310 ++++ ...structcutlass_1_1ZipTensorRef-members.html | 96 ++ docs/structcutlass_1_1ZipTensorRef.html | 255 ++++ ...tcutlass_1_1ZipTileAllocation-members.html | 98 ++ docs/structcutlass_1_1ZipTileAllocation.html | 287 ++++ ..._1_1ZipTileIterator_1_1Params-members.html | 94 ++ ...tcutlass_1_1ZipTileIterator_1_1Params.html | 217 +++ docs/structcutlass_1_1bin1__t.html | 92 ++ ...ructcutlass_1_1divide__assert-members.html | 2 +- docs/structcutlass_1_1divide__assert.html | 2 +- ..._1_1gemm_1_1ClearAccumulators-members.html | 3 +- ...tcutlass_1_1gemm_1_1ClearAccumulators.html | 34 +- ...1_1ClearAccumulators_1_1SharedStorage.html | 2 +- ...mm_1_1ColumnMajorBlockSwizzle-members.html | 95 ++ ...ss_1_1gemm_1_1ColumnMajorBlockSwizzle.html | 260 ++++ ...utlass_1_1gemm_1_1DgemmConfig-members.html | 59 +- .../structcutlass_1_1gemm_1_1DgemmConfig.html | 163 +- docs/structcutlass_1_1gemm_1_1DgemmConfig.png | Bin 2957 -> 3040 bytes ...utlass_1_1gemm_1_1DgemmTraits-members.html | 19 +- .../structcutlass_1_1gemm_1_1DgemmTraits.html | 37 +- docs/structcutlass_1_1gemm_1_1DgemmTraits.png | Bin 8483 -> 8474 bytes ...ss_1_1gemm_1_1Fp16SgemmConfig-members.html | 118 ++ ...uctcutlass_1_1gemm_1_1Fp16SgemmConfig.html | 186 +++ ...ructcutlass_1_1gemm_1_1Fp16SgemmConfig.png | Bin 0 -> 3199 bytes ...1gemm_1_1Fp16SgemmSgemmTraits-members.html | 114 ++ ...tlass_1_1gemm_1_1Fp16SgemmSgemmTraits.html | 182 +++ ...utlass_1_1gemm_1_1Fp16SgemmSgemmTraits.png | Bin 0 -> 8749 bytes ..._1gemm_1_1FragmentMultiplyAdd-members.html | 19 +- ...utlass_1_1gemm_1_1FragmentMultiplyAdd.html | 131 +- ...half_00_01half_00_01true_01_4-members.html | 96 ++ ...Add_3_01half_00_01half_00_01true_01_4.html | 287 ++++ ...structcutlass_1_1gemm_1_1Gemm-members.html | 12 +- docs/structcutlass_1_1gemm_1_1Gemm.html | 151 +- ...cutlass_1_1gemm_1_1GemmConfig-members.html | 59 +- docs/structcutlass_1_1gemm_1_1GemmConfig.html | 427 +++--- ...tcutlass_1_1gemm_1_1GemmCoord-members.html | 152 ++ docs/structcutlass_1_1gemm_1_1GemmCoord.html | 1102 ++++++++++++++ docs/structcutlass_1_1gemm_1_1GemmCoord.png | Bin 0 -> 691 bytes ...ctcutlass_1_1gemm_1_1GemmDesc-members.html | 44 +- docs/structcutlass_1_1gemm_1_1GemmDesc.html | 669 ++++++-- ...tlass_1_1gemm_1_1GemmEpilogue-members.html | 41 +- ...structcutlass_1_1gemm_1_1GemmEpilogue.html | 147 +- ...1_1gemm_1_1GemmEpilogueTraits-members.html | 38 +- ...cutlass_1_1gemm_1_1GemmEpilogueTraits.html | 228 +-- ...m_1_1GemmEpilogueTraitsHelper-members.html | 11 +- ...s_1_1gemm_1_1GemmEpilogueTraitsHelper.html | 65 +- ...1GemmEpilogueTraits_1_1Params-members.html | 22 +- ...1gemm_1_1GemmEpilogueTraits_1_1Params.html | 118 +- ...ilogueTraits_1_1SharedStorage-members.html | 9 +- ..._1GemmEpilogueTraits_1_1SharedStorage.html | 54 +- ...1gemm_1_1GemmGlobalIteratorAb-members.html | 109 +- ...tlass_1_1gemm_1_1GemmGlobalIteratorAb.html | 723 ++++++--- ...utlass_1_1gemm_1_1GemmGlobalIteratorAb.png | Bin 3492 -> 4486 bytes ...emmGlobalIteratorAb_1_1Params-members.html | 42 +- ...emm_1_1GemmGlobalIteratorAb_1_1Params.html | 135 +- ...gemm_1_1GemmGlobalIteratorAb_1_1Params.png | Bin 3308 -> 3459 bytes ...1gemm_1_1GemmGlobalIteratorCd-members.html | 72 +- ...tlass_1_1gemm_1_1GemmGlobalIteratorCd.html | 531 +++++-- ...utlass_1_1gemm_1_1GemmGlobalIteratorCd.png | Bin 1734 -> 2546 bytes ...emmGlobalIteratorCd_1_1Params-members.html | 7 +- ...emm_1_1GemmGlobalIteratorCd_1_1Params.html | 48 +- ...gemm_1_1GemmGlobalIteratorCd_1_1Params.png | Bin 0 -> 1464 bytes ...emm_1_1GemmGlobalTileCdTraits-members.html | 5 +- ...ass_1_1gemm_1_1GemmGlobalTileCdTraits.html | 43 +- ...lTileCdTraits_1_1ThreadOffset-members.html | 2 +- ...emmGlobalTileCdTraits_1_1ThreadOffset.html | 2 +- ...1gemm_1_1GemmGlobalTileTraits-members.html | 11 +- ...tlass_1_1gemm_1_1GemmGlobalTileTraits.html | 75 +- ...utlass_1_1gemm_1_1GemmGlobalTileTraits.png | Bin 2856 -> 2833 bytes ...balTileTraits_1_1ThreadOffset-members.html | 2 +- ...1GemmGlobalTileTraits_1_1ThreadOffset.html | 2 +- ...emm_1_1GemmMultiplicandTraits-members.html | 2 +- ...ass_1_1gemm_1_1GemmMultiplicandTraits.html | 2 +- ..._1gemm_1_1GemmOperandTraitsAb-members.html | 2 +- ...utlass_1_1gemm_1_1GemmOperandTraitsAb.html | 2 +- ..._1_1GemmSharedLoadTileATraits-members.html | 6 +- ..._1_1gemm_1_1GemmSharedLoadTileATraits.html | 25 +- ...adTileATraits_1_1ThreadOffset-members.html | 2 +- ...SharedLoadTileATraits_1_1ThreadOffset.html | 2 +- ..._1_1GemmSharedLoadTileBTraits-members.html | 6 +- ..._1_1gemm_1_1GemmSharedLoadTileBTraits.html | 25 +- ...adTileBTraits_1_1ThreadOffset-members.html | 2 +- ...SharedLoadTileBTraits_1_1ThreadOffset.html | 2 +- ..._1_1GemmSharedLoadTileDTraits-members.html | 23 +- ..._1_1gemm_1_1GemmSharedLoadTileDTraits.html | 66 +- ...adTileDTraits_1_1ThreadOffset-members.html | 2 +- ...SharedLoadTileDTraits_1_1ThreadOffset.html | 2 +- ..._1GemmSharedStoreTileAbTraits-members.html | 2 +- ..._1gemm_1_1GemmSharedStoreTileAbTraits.html | 2 +- ...eTileAbTraits_1_1ThreadOffset-members.html | 2 +- ...aredStoreTileAbTraits_1_1ThreadOffset.html | 2 +- ...1_1GemmSharedStoreTileDTraits-members.html | 2 +- ...1_1gemm_1_1GemmSharedStoreTileDTraits.html | 2 +- ...reTileDTraits_1_1ThreadOffset-members.html | 2 +- ...haredStoreTileDTraits_1_1ThreadOffset.html | 2 +- ...aredStoreWithSkewTileAbTraits-members.html | 2 +- ..._1GemmSharedStoreWithSkewTileAbTraits.html | 2 +- ...wTileAbTraits_1_1ThreadOffset-members.html | 2 +- ...eWithSkewTileAbTraits_1_1ThreadOffset.html | 2 +- ...lass_1_1gemm_1_1GemmTileTraitsHelperA.html | 2 +- ...nMajor_00_01GemmConfig___01_4-members.html | 2 +- ..._1kColumnMajor_00_01GemmConfig___01_4.html | 8 +- ...1_1kColumnMajor_00_01GemmConfig___01_4.png | Bin 1679 -> 1846 bytes ...wMajor_00_01GemmConfig___01_4-members.html | 9 +- ...t_1_1kRowMajor_00_01GemmConfig___01_4.html | 67 +- ...lass_1_1gemm_1_1GemmTileTraitsHelperB.html | 2 +- ...nMajor_00_01GemmConfig___01_4-members.html | 9 +- ..._1kColumnMajor_00_01GemmConfig___01_4.html | 67 +- ...wMajor_00_01GemmConfig___01_4-members.html | 2 +- ...t_1_1kRowMajor_00_01GemmConfig___01_4.html | 8 +- ...ut_1_1kRowMajor_00_01GemmConfig___01_4.png | Bin 1637 -> 1731 bytes ...cutlass_1_1gemm_1_1GemmTraits-members.html | 15 +- docs/structcutlass_1_1gemm_1_1GemmTraits.html | 143 +- ...aits_1_1MainLoopSharedStorage-members.html | 6 +- ..._1GemmTraits_1_1MainLoopSharedStorage.html | 31 +- ...1gemm_1_1GemmTraits_1_1Params-members.html | 18 +- ...tlass_1_1gemm_1_1GemmTraits_1_1Params.html | 393 +++-- ...utlass_1_1gemm_1_1GemmTraits_1_1Params.png | Bin 0 -> 2212 bytes docs/structcutlass_1_1gemm_1_1GetExtent.html | 2 +- ...perand_1_1kA_00_01Tile___01_4-members.html | 2 +- ..._01GemmOperand_1_1kA_00_01Tile___01_4.html | 2 +- ...perand_1_1kB_00_01Tile___01_4-members.html | 2 +- ..._01GemmOperand_1_1kB_00_01Tile___01_4.html | 2 +- ...s_1_1gemm_1_1GlobalLoadStream-members.html | 58 +- ...ctcutlass_1_1gemm_1_1GlobalLoadStream.html | 809 ++++++++-- ...1gemm_1_1GlobalLoadStreamPair-members.html | 104 ++ ...tlass_1_1gemm_1_1GlobalLoadStreamPair.html | 519 +++++++ ...lobalLoadStreamPair_1_1Params-members.html | 94 ++ ...emm_1_1GlobalLoadStreamPair_1_1Params.html | 217 +++ ...adStreamPair_1_1SharedStorage-members.html | 92 ++ ...GlobalLoadStreamPair_1_1SharedStorage.html | 139 ++ ...1_1GlobalLoadStream_1_1Params-members.html | 94 ++ ...1_1gemm_1_1GlobalLoadStream_1_1Params.html | 215 +++ ..._1_1GlobalLoadStream_1_1SharedStorage.html | 97 ++ ...utlass_1_1gemm_1_1HgemmConfig-members.html | 59 +- .../structcutlass_1_1gemm_1_1HgemmConfig.html | 163 +- docs/structcutlass_1_1gemm_1_1HgemmConfig.png | Bin 2911 -> 2958 bytes ...gemmCrosswiseGlobalTileTraits-members.html | 9 +- ...emm_1_1HgemmCrosswiseGlobalTileTraits.html | 59 +- ...balTileTraits_1_1ThreadOffset-members.html | 2 +- ...swiseGlobalTileTraits_1_1ThreadOffset.html | 2 +- ...tlass_1_1gemm_1_1HgemmSwizzle-members.html | 2 +- ...structcutlass_1_1gemm_1_1HgemmSwizzle.html | 2 +- ...ass_1_1gemm_1_1HgemmTileTraitsHelperA.html | 2 +- ...wMajor_00_01GemmConfig___01_4-members.html | 9 +- ...t_1_1kRowMajor_00_01GemmConfig___01_4.html | 55 +- ...ass_1_1gemm_1_1HgemmTileTraitsHelperB.html | 2 +- ...nMajor_00_01GemmConfig___01_4-members.html | 9 +- ..._1kColumnMajor_00_01GemmConfig___01_4.html | 55 +- ...utlass_1_1gemm_1_1HgemmTraits-members.html | 19 +- .../structcutlass_1_1gemm_1_1HgemmTraits.html | 37 +- docs/structcutlass_1_1gemm_1_1HgemmTraits.png | Bin 3190 -> 3169 bytes ..._1_1gemm_1_1HgemmTraitsHelper-members.html | 44 +- ...tcutlass_1_1gemm_1_1HgemmTraitsHelper.html | 270 ++-- ...tcutlass_1_1gemm_1_1HgemmTransformerA.html | 2 +- ...umnMajor_00_01Iterator___01_4-members.html | 2 +- ..._1_1kColumnMajor_00_01Iterator___01_4.html | 2 +- ...RowMajor_00_01Iterator___01_4-members.html | 2 +- ...out_1_1kRowMajor_00_01Iterator___01_4.html | 2 +- ...tcutlass_1_1gemm_1_1HgemmTransformerB.html | 2 +- ...umnMajor_00_01Iterator___01_4-members.html | 2 +- ..._1_1kColumnMajor_00_01Iterator___01_4.html | 2 +- ...RowMajor_00_01Iterator___01_4-members.html | 2 +- ...out_1_1kRowMajor_00_01Iterator___01_4.html | 2 +- ...1gemm_1_1IdentityBlockSwizzle-members.html | 9 +- ...tlass_1_1gemm_1_1IdentityBlockSwizzle.html | 111 +- ...utlass_1_1gemm_1_1IgemmConfig-members.html | 59 +- .../structcutlass_1_1gemm_1_1IgemmConfig.html | 163 +- docs/structcutlass_1_1gemm_1_1IgemmConfig.png | Bin 2603 -> 2581 bytes ...t_00_01ThreadGemmShape___01_4-members.html | 118 ++ ...01int8__t_00_01ThreadGemmShape___01_4.html | 186 +++ ..._01int8__t_00_01ThreadGemmShape___01_4.png | Bin 0 -> 2442 bytes ...lass_1_1gemm_1_1IgemmEpilogue-members.html | 43 +- ...tructcutlass_1_1gemm_1_1IgemmEpilogue.html | 59 +- ..._1gemm_1_1IgemmEpilogueScalar-members.html | 2 +- ...utlass_1_1gemm_1_1IgemmEpilogueScalar.html | 2 +- ...mmEpilogueScalar_3_01int_01_4-members.html | 2 +- ...m_1_1IgemmEpilogueScalar_3_01int_01_4.html | 2 +- ..._1gemm_1_1IgemmEpilogueTraits-members.html | 34 +- ...utlass_1_1gemm_1_1IgemmEpilogueTraits.html | 100 +- ...cutlass_1_1gemm_1_1IgemmEpilogueTraits.png | Bin 3581 -> 3632 bytes ..._1_1IgemmEpilogueTraitsHelper-members.html | 13 +- ..._1_1gemm_1_1IgemmEpilogueTraitsHelper.html | 47 +- ...ilogueTraits___00_01true_01_4-members.html | 43 +- ...01GemmEpilogueTraits___00_01true_01_4.html | 59 +- ..._1_1IgemmFloatToInt8Converter-members.html | 2 +- ..._1_1gemm_1_1IgemmFloatToInt8Converter.html | 2 +- ...gemm_1_1IgemmGlobalIteratorAb-members.html | 151 ++ ...lass_1_1gemm_1_1IgemmGlobalIteratorAb.html | 597 ++++++++ ...tlass_1_1gemm_1_1IgemmGlobalIteratorAb.png | Bin 0 -> 4485 bytes ...1_1IgemmGlobalLoadTransformer-members.html | 2 +- ...1_1gemm_1_1IgemmGlobalLoadTransformer.html | 2 +- ...ements___01_4_00_01float_01_4-members.html | 2 +- ...00_01kElements___01_4_00_01float_01_4.html | 2 +- ..._1IgemmGlobalStoreTransformer-members.html | 2 +- ..._1gemm_1_1IgemmGlobalStoreTransformer.html | 2 +- ..._t_00_01kElements___01_4_01_4-members.html | 2 +- ..._01int8__t_00_01kElements___01_4_01_4.html | 2 +- ...gemm_1_1IgemmGlobalTileTraits-members.html | 105 ++ ...lass_1_1gemm_1_1IgemmGlobalTileTraits.html | 257 ++++ ...tlass_1_1gemm_1_1IgemmGlobalTileTraits.png | Bin 0 -> 2081 bytes ...balTileTraits_1_1ThreadOffset-members.html | 91 ++ ...IgemmGlobalTileTraits_1_1ThreadOffset.html | 132 ++ ..._1_1IgemmInt8ToFloatConverter-members.html | 2 +- ..._1_1gemm_1_1IgemmInt8ToFloatConverter.html | 2 +- ..._1IgemmSharedStoreTransformer-members.html | 2 +- ..._1gemm_1_1IgemmSharedStoreTransformer.html | 2 +- ...tlass_1_1gemm_1_1IgemmSwizzle-members.html | 2 +- ...structcutlass_1_1gemm_1_1IgemmSwizzle.html | 2 +- ...ass_1_1gemm_1_1IgemmTileTraitsHelperA.html | 14 +- ...lass_1_1gemm_1_1IgemmTileTraitsHelperA.png | Bin 1401 -> 1476 bytes ...emmConfig___00_01Index___01_4-members.html | 99 ++ ...r_00_01GemmConfig___00_01Index___01_4.html | 237 +++ ...or_00_01GemmConfig___00_01Index___01_4.png | Bin 0 -> 1837 bytes ...emmConfig___00_01Index___01_4-members.html | 98 ++ ...r_00_01GemmConfig___00_01Index___01_4.html | 273 ++++ ...ass_1_1gemm_1_1IgemmTileTraitsHelperB.html | 14 +- ...lass_1_1gemm_1_1IgemmTileTraitsHelperB.png | Bin 1400 -> 1476 bytes ...emmConfig___00_01Index___01_4-members.html | 98 ++ ...r_00_01GemmConfig___00_01Index___01_4.html | 273 ++++ ...emmConfig___00_01Index___01_4-members.html | 99 ++ ...r_00_01GemmConfig___00_01Index___01_4.html | 237 +++ ...or_00_01GemmConfig___00_01Index___01_4.png | Bin 0 -> 1727 bytes ...utlass_1_1gemm_1_1IgemmTraits-members.html | 19 +- .../structcutlass_1_1gemm_1_1IgemmTraits.html | 37 +- docs/structcutlass_1_1gemm_1_1IgemmTraits.png | Bin 3096 -> 3065 bytes ..._1_1gemm_1_1IgemmTraitsHelper-members.html | 42 +- ...tcutlass_1_1gemm_1_1IgemmTraitsHelper.html | 256 ++-- ...tcutlass_1_1gemm_1_1IgemmTransformerA.html | 2 +- ...umnMajor_00_01Iterator___01_4-members.html | 2 +- ..._1_1kColumnMajor_00_01Iterator___01_4.html | 2 +- ...RowMajor_00_01Iterator___01_4-members.html | 2 +- ...out_1_1kRowMajor_00_01Iterator___01_4.html | 2 +- ...tcutlass_1_1gemm_1_1IgemmTransformerB.html | 2 +- ...umnMajor_00_01Iterator___01_4-members.html | 2 +- ..._1_1kColumnMajor_00_01Iterator___01_4.html | 2 +- ...RowMajor_00_01Iterator___01_4-members.html | 2 +- ...out_1_1kRowMajor_00_01Iterator___01_4.html | 2 +- ...ructcutlass_1_1gemm_1_1Launch-members.html | 91 ++ docs/structcutlass_1_1gemm_1_1Launch.html | 155 ++ ...unch_3_01Gemm_00_01false_01_4-members.html | 91 ++ ...mm_1_1Launch_3_01Gemm_00_01false_01_4.html | 155 ++ ...lass_1_1gemm_1_1LinearScaling-members.html | 16 +- ...tructcutlass_1_1gemm_1_1LinearScaling.html | 254 ++- ...structcutlass_1_1gemm_1_1LinearScaling.png | Bin 0 -> 1511 bytes ...emm_1_1LinearScalingDevicePtr-members.html | 103 ++ ...ass_1_1gemm_1_1LinearScalingDevicePtr.html | 232 +++ ...lass_1_1gemm_1_1LinearScalingDevicePtr.png | Bin 0 -> 1507 bytes ...mm_1_1LinearScaling_1_1Params-members.html | 6 +- ...ss_1_1gemm_1_1LinearScaling_1_1Params.html | 96 +- ...ructcutlass_1_1gemm_1_1ProjectOperand.html | 2 +- ...rand_1_1kA_00_01Kstrided_01_4-members.html | 2 +- ...1GemmOperand_1_1kA_00_01Kstrided_01_4.html | 2 +- ...rand_1_1kB_00_01Kstrided_01_4-members.html | 2 +- ...1GemmOperand_1_1kB_00_01Kstrided_01_4.html | 2 +- ...mOperand_1_1kC_00_01true_01_4-members.html | 2 +- ..._3_01GemmOperand_1_1kC_00_01true_01_4.html | 2 +- ...mOperand_1_1kD_00_01true_01_4-members.html | 2 +- ..._3_01GemmOperand_1_1kD_00_01true_01_4.html | 2 +- ...ass_1_1gemm_1_1ReshapeThreads-members.html | 2 +- ...ructcutlass_1_1gemm_1_1ReshapeThreads.html | 2 +- ...00_01Threads___00_01true_01_4-members.html | 2 +- ...1Tile___00_01Threads___00_01true_01_4.html | 2 +- ...1gemm_1_1RowMajorBlockSwizzle-members.html | 95 ++ ...tlass_1_1gemm_1_1RowMajorBlockSwizzle.html | 260 ++++ ...utlass_1_1gemm_1_1SgemmConfig-members.html | 59 +- .../structcutlass_1_1gemm_1_1SgemmConfig.html | 163 +- docs/structcutlass_1_1gemm_1_1SgemmConfig.png | Bin 2930 -> 3126 bytes ...lass_1_1gemm_1_1SgemmLBTraits-members.html | 114 ++ ...tructcutlass_1_1gemm_1_1SgemmLBTraits.html | 185 +++ ...structcutlass_1_1gemm_1_1SgemmLBTraits.png | Bin 0 -> 8497 bytes ...utlass_1_1gemm_1_1SgemmTraits-members.html | 19 +- .../structcutlass_1_1gemm_1_1SgemmTraits.html | 37 +- docs/structcutlass_1_1gemm_1_1SgemmTraits.png | Bin 8487 -> 8476 bytes ...s_1_1gemm_1_1SharedLoadStream-members.html | 31 +- ...ctcutlass_1_1gemm_1_1SharedLoadStream.html | 251 ++- ...1_1SharedLoadStream_1_1Params-members.html | 2 +- ...1_1gemm_1_1SharedLoadStream_1_1Params.html | 2 +- ...s_1_1gemm_1_1SharedStreamPair-members.html | 101 ++ ...ctcutlass_1_1gemm_1_1SharedStreamPair.html | 411 +++++ ...1_1SharedStreamPair_1_1Params-members.html | 92 ++ ...1_1gemm_1_1SharedStreamPair_1_1Params.html | 139 ++ ...1SimplifiedGemmEpilogueTraits-members.html | 34 +- ...1gemm_1_1SimplifiedGemmEpilogueTraits.html | 100 +- ..._1gemm_1_1SimplifiedGemmEpilogueTraits.png | Bin 3650 -> 3705 bytes ...1gemm_1_1SimplifiedGemmTraits-members.html | 15 +- ...tlass_1_1gemm_1_1SimplifiedGemmTraits.html | 27 +- ...1_1SimplifiedGemmTraitsHelper-members.html | 6 +- ...1_1gemm_1_1SimplifiedGemmTraitsHelper.html | 26 +- ..._1_1gemm_1_1ThreadMultiplyAdd-members.html | 31 +- ...tcutlass_1_1gemm_1_1ThreadMultiplyAdd.html | 201 +-- ...1half030e27fde4380ad93cd574bc743e0ba3.html | 103 ++ ...1half8ea66703da782e035d986e48031ff835.html | 103 ++ ...__00_01half_00_01half_00_01float_01_4.html | 401 +++++ ...___00_01half_00_01half_00_01half_01_4.html | 402 +++++ ...1int86f255c0f34c1afba22b3a7d64d8f85bf.html | 103 ++ ..._01int8__t_00_01int8__t_00_01int_01_4.html | 401 +++++ ...m_1_1WmmaGemmGlobalIteratorCd-members.html | 82 +- ...s_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html | 717 ++++----- ...ss_1_1gemm_1_1WmmaGemmGlobalIteratorCd.png | Bin 1763 -> 2539 bytes ...mmaGemmGlobalIteratorCdTraits-members.html | 11 +- ...emm_1_1WmmaGemmGlobalIteratorCdTraits.html | 31 +- ...ratorCdTraits_1_1ThreadOffset-members.html | 2 +- ...lobalIteratorCdTraits_1_1ThreadOffset.html | 2 +- ...emmGlobalIteratorCd_1_1Params-members.html | 19 +- ...1_1WmmaGemmGlobalIteratorCd_1_1Params.html | 203 +-- ..._1_1WmmaGemmGlobalIteratorCd_1_1Params.png | Bin 0 -> 1460 bytes ...s_1_1gemm_1_1swizzleDirection-members.html | 93 ++ ...ctcutlass_1_1gemm_1_1swizzleDirection.html | 122 ++ docs/structcutlass_1_1int4__t.html | 92 ++ docs/structcutlass_1_1is__pow2-members.html | 2 +- docs/structcutlass_1_1is__pow2.html | 2 +- docs/structcutlass_1_1log2__down-members.html | 2 +- docs/structcutlass_1_1log2__down.html | 2 +- ..._3_01N_00_011_00_01Count_01_4-members.html | 2 +- ...g2__down_3_01N_00_011_00_01Count_01_4.html | 2 +- docs/structcutlass_1_1log2__up-members.html | 2 +- docs/structcutlass_1_1log2__up.html | 2 +- ..._3_01N_00_011_00_01Count_01_4-members.html | 2 +- ...log2__up_3_01N_00_011_00_01Count_01_4.html | 2 +- ...cutlass_1_1platform_1_1aligned__chunk.html | 2 +- ...1platform_1_1aligned__storage-members.html | 2 +- ...tlass_1_1platform_1_1aligned__storage.html | 2 +- ..._1_1platform_1_1alignment__of-members.html | 2 +- ...tcutlass_1_1platform_1_1alignment__of.html | 2 +- ...tform_1_1alignment__of_1_1pad-members.html | 2 +- ...s_1_1platform_1_1alignment__of_1_1pad.html | 2 +- ..._of_3_01const_01value__t_01_4-members.html | 2 +- ...ignment__of_3_01const_01value__t_01_4.html | 2 +- ...st_01volatile_01value__t_01_4-members.html | 2 +- ..._3_01const_01volatile_01value__t_01_4.html | 2 +- ...lignment__of_3_01double2_01_4-members.html | 2 +- ...orm_1_1alignment__of_3_01double2_01_4.html | 2 +- ...lignment__of_3_01double4_01_4-members.html | 2 +- ...orm_1_1alignment__of_3_01double4_01_4.html | 2 +- ...alignment__of_3_01float4_01_4-members.html | 2 +- ...form_1_1alignment__of_3_01float4_01_4.html | 2 +- ..._1alignment__of_3_01int4_01_4-members.html | 2 +- ...atform_1_1alignment__of_3_01int4_01_4.html | 2 +- ...1alignment__of_3_01long4_01_4-members.html | 2 +- ...tform_1_1alignment__of_3_01long4_01_4.html | 2 +- ...gnment__of_3_01longlong2_01_4-members.html | 2 +- ...m_1_1alignment__of_3_01longlong2_01_4.html | 2 +- ...gnment__of_3_01longlong4_01_4-members.html | 2 +- ...m_1_1alignment__of_3_01longlong4_01_4.html | 2 +- ...1alignment__of_3_01uint4_01_4-members.html | 2 +- ...tform_1_1alignment__of_3_01uint4_01_4.html | 2 +- ...alignment__of_3_01ulong4_01_4-members.html | 2 +- ...form_1_1alignment__of_3_01ulong4_01_4.html | 2 +- ...nment__of_3_01ulonglong2_01_4-members.html | 2 +- ..._1_1alignment__of_3_01ulonglong2_01_4.html | 2 +- ...nment__of_3_01ulonglong4_01_4-members.html | 2 +- ..._1_1alignment__of_3_01ulonglong4_01_4.html | 2 +- ..._3_01volatile_01value__t_01_4-members.html | 2 +- ...ment__of_3_01volatile_01value__t_01_4.html | 2 +- ...1_1platform_1_1bool__constant-members.html | 2 +- ...cutlass_1_1platform_1_1bool__constant.html | 2 +- ...ss_1_1platform_1_1conditional-members.html | 2 +- ...uctcutlass_1_1platform_1_1conditional.html | 2 +- ..._3_01false_00_01T_00_01F_01_4-members.html | 2 +- ...ditional_3_01false_00_01T_00_01F_01_4.html | 2 +- ..._1platform_1_1default__delete-members.html | 2 +- ...utlass_1_1platform_1_1default__delete.html | 2 +- ..._1_1default__delete_3_01T[]_4-members.html | 2 +- ...platform_1_1default__delete_3_01T[]_4.html | 2 +- ...ass_1_1platform_1_1enable__if-members.html | 2 +- ...ructcutlass_1_1platform_1_1enable__if.html | 2 +- ...m_1_1enable__if_3_01false_00_01T_01_4.html | 2 +- ...utlass_1_1platform_1_1greater-members.html | 2 +- .../structcutlass_1_1platform_1_1greater.html | 2 +- ...latform_1_1integral__constant-members.html | 2 +- ...ass_1_1platform_1_1integral__constant.html | 2 +- ...1_1platform_1_1is__arithmetic-members.html | 2 +- ...cutlass_1_1platform_1_1is__arithmetic.html | 2 +- ...s_1_1platform_1_1is__base__of-members.html | 2 +- ...ctcutlass_1_1platform_1_1is__base__of.html | 2 +- ...tform_1_1is__base__of__helper-members.html | 2 +- ...s_1_1platform_1_1is__base__of__helper.html | 2 +- ...is__base__of__helper_1_1dummy-members.html | 2 +- ...form_1_1is__base__of__helper_1_1dummy.html | 2 +- ...atform_1_1is__floating__point-members.html | 2 +- ...ss_1_1platform_1_1is__floating__point.html | 2 +- ..._1platform_1_1is__fundamental-members.html | 2 +- ...utlass_1_1platform_1_1is__fundamental.html | 2 +- ...s_1_1platform_1_1is__integral-members.html | 2 +- ...ctcutlass_1_1platform_1_1is__integral.html | 2 +- ...1_1is__integral_3_01char_01_4-members.html | 2 +- ...latform_1_1is__integral_3_01char_01_4.html | 2 +- ...__integral_3_01const_01T_01_4-members.html | 2 +- ...rm_1_1is__integral_3_01const_01T_01_4.html | 2 +- ...3_01const_01volatile_01T_01_4-members.html | 2 +- ...ntegral_3_01const_01volatile_01T_01_4.html | 2 +- ..._1_1is__integral_3_01int_01_4-members.html | 2 +- ...platform_1_1is__integral_3_01int_01_4.html | 2 +- ...1_1is__integral_3_01long_01_4-members.html | 2 +- ...latform_1_1is__integral_3_01long_01_4.html | 2 +- ...integral_3_01long_01long_01_4-members.html | 2 +- ..._1_1is__integral_3_01long_01long_01_4.html | 2 +- ..._1is__integral_3_01short_01_4-members.html | 2 +- ...atform_1_1is__integral_3_01short_01_4.html | 2 +- ...tegral_3_01signed_01char_01_4-members.html | 2 +- ..._1is__integral_3_01signed_01char_01_4.html | 2 +- ...gral_3_01unsigned_01char_01_4-members.html | 2 +- ...is__integral_3_01unsigned_01char_01_4.html | 2 +- ...egral_3_01unsigned_01int_01_4-members.html | 2 +- ...1is__integral_3_01unsigned_01int_01_4.html | 2 +- ...gral_3_01unsigned_01long_01_4-members.html | 2 +- ...is__integral_3_01unsigned_01long_01_4.html | 2 +- ...01unsigned_01long_01long_01_4-members.html | 2 +- ...egral_3_01unsigned_01long_01long_01_4.html | 2 +- ...ral_3_01unsigned_01short_01_4-members.html | 2 +- ...s__integral_3_01unsigned_01short_01_4.html | 2 +- ...ntegral_3_01volatile_01T_01_4-members.html | 2 +- ...1_1is__integral_3_01volatile_01T_01_4.html | 2 +- ...ss_1_1platform_1_1is__pointer-members.html | 2 +- ...uctcutlass_1_1platform_1_1is__pointer.html | 2 +- ...atform_1_1is__pointer__helper-members.html | 2 +- ...ss_1_1platform_1_1is__pointer__helper.html | 2 +- ...inter__helper_3_01T_01_5_01_4-members.html | 2 +- ..._1is__pointer__helper_3_01T_01_5_01_4.html | 2 +- ...tlass_1_1platform_1_1is__same-members.html | 2 +- ...structcutlass_1_1platform_1_1is__same.html | 2 +- ...1_1is__same_3_01A_00_01A_01_4-members.html | 2 +- ...latform_1_1is__same_3_01A_00_01A_01_4.html | 2 +- ...rm_1_1is__trivially__copyable-members.html | 2 +- ..._1platform_1_1is__trivially__copyable.html | 2 +- ...tlass_1_1platform_1_1is__void-members.html | 2 +- ...structcutlass_1_1platform_1_1is__void.html | 2 +- ...s_1_1platform_1_1is__volatile-members.html | 2 +- ...ctcutlass_1_1platform_1_1is__volatile.html | 2 +- ...olatile_3_01volatile_01T_01_4-members.html | 2 +- ...1_1is__volatile_3_01volatile_01T_01_4.html | 2 +- ...ctcutlass_1_1platform_1_1less-members.html | 2 +- docs/structcutlass_1_1platform_1_1less.html | 2 +- ...ructcutlass_1_1platform_1_1nullptr__t.html | 2 +- ...ctcutlass_1_1platform_1_1plus-members.html | 2 +- docs/structcutlass_1_1platform_1_1plus.html | 2 +- ..._1_1platform_1_1remove__const-members.html | 2 +- ...tcutlass_1_1platform_1_1remove__const.html | 2 +- ...ove__const_3_01const_01T_01_4-members.html | 2 +- ...m_1_1remove__const_3_01const_01T_01_4.html | 2 +- ...ass_1_1platform_1_1remove__cv-members.html | 2 +- ...ructcutlass_1_1platform_1_1remove__cv.html | 2 +- ...1platform_1_1remove__volatile-members.html | 2 +- ...tlass_1_1platform_1_1remove__volatile.html | 2 +- ...olatile_3_01volatile_01T_01_4-members.html | 2 +- ...emove__volatile_3_01volatile_01T_01_4.html | 2 +- docs/structcutlass_1_1sqrt__est-members.html | 2 +- docs/structcutlass_1_1sqrt__est.html | 2 +- docs/structcutlass_1_1uint4__t.html | 92 ++ docs/tensor__ref_8h.html | 19 +- docs/tensor__ref_8h_source.html | 102 +- docs/tensor__ref__collection_8h.html | 115 ++ docs/tensor__ref__collection_8h_source.html | 146 ++ docs/tensor__view_8h.html | 15 +- docs/tensor__view_8h_source.html | 82 +- docs/thread__multiply__add_8h.html | 6 +- docs/thread__multiply__add_8h_source.html | 33 +- docs/threadblock__swizzle_8h.html | 126 ++ docs/threadblock__swizzle_8h_source.html | 120 ++ docs/tile__allocation_8h.html | 114 ++ docs/tile__allocation_8h_source.html | 121 ++ docs/tile__coord_8h.html | 107 ++ docs/tile__coord_8h_source.html | 128 ++ docs/tile__iterator_8h.html | 35 +- docs/tile__iterator_8h_source.html | 353 +++-- docs/tile__stream_8h.html | 130 ++ docs/tile__stream_8h_source.html | 162 ++ docs/tile__traits__standard_8h.html | 4 +- docs/tile__traits__standard_8h_source.html | 13 +- docs/unioncutlass_1_1Vector-members.html | 6 +- docs/unioncutlass_1_1Vector.html | 26 +- ..._01bin1__t_00_01kLanes___01_4-members.html | 98 ++ ...Vector_3_01bin1__t_00_01kLanes___01_4.html | 329 ++++ ..._1Vector_3_01half_00_011_01_4-members.html | 97 ++ ...utlass_1_1Vector_3_01half_00_011_01_4.html | 279 ++++ ...r_3_01half_00_01kLanes___01_4-members.html | 10 +- ...1_1Vector_3_01half_00_01kLanes___01_4.html | 68 +- ..._01int4__t_00_01kLanes___01_4-members.html | 98 ++ ...Vector_3_01int4__t_00_01kLanes___01_4.html | 329 ++++ ...01uint4__t_00_01kLanes___01_4-members.html | 98 ++ ...ector_3_01uint4__t_00_01kLanes___01_4.html | 329 ++++ ...Traits_1_1StreamSharedStorage-members.html | 10 +- ...EpilogueTraits_1_1StreamSharedStorage.html | 30 +- ..._1GemmTraits_1_1SharedStorage-members.html | 2 +- ..._1gemm_1_1GemmTraits_1_1SharedStorage.html | 6 +- docs/vector_8h.html | 34 +- docs/vector_8h_source.html | 90 +- docs/wmma__gemm__epilogue__traits_8h.html | 4 +- ...mma__gemm__epilogue__traits_8h_source.html | 14 +- docs/wmma__gemm__global__tile_8h.html | 4 +- docs/wmma__gemm__global__tile_8h_source.html | 92 +- docs/wmma__gemm__multiply__add_8h.html | 4 +- docs/wmma__gemm__multiply__add_8h_source.html | 11 +- docs/wmma__gemm__shared__tile_8h.html | 4 +- docs/wmma__gemm__shared__tile_8h_source.html | 20 +- docs/wmma__gemm__traits_8h.html | 4 +- docs/wmma__gemm__traits_8h_source.html | 24 +- docs/wmma__matrix_8h.html | 2 +- docs/wmma__matrix_8h_source.html | 12 +- docs/zip__fragment_8h.html | 127 ++ docs/zip__fragment_8h_source.html | 114 ++ docs/zip__tensor__ref_8h.html | 116 ++ docs/zip__tensor__ref_8h_source.html | 99 ++ docs/zip__tile__iterator_8h.html | 113 ++ docs/zip__tile__iterator_8h_source.html | 139 ++ 1121 files changed, 64211 insertions(+), 12918 deletions(-) create mode 100644 docs/classcutlass_1_1TensorRef.png create mode 100644 docs/classcutlass_1_1TensorRefArray_1_1ConstIterator-members.html create mode 100644 docs/classcutlass_1_1TensorRefArray_1_1ConstIterator.html create mode 100644 docs/classcutlass_1_1TensorRefBatchStrided_1_1ConstIterator-members.html create mode 100644 docs/classcutlass_1_1TensorRefBatchStrided_1_1ConstIterator.html create mode 100644 docs/classcutlass_1_1TensorRef_3_01Storage___00_01Rank___00_01MapFunc___00_011_00_01Index___00_01LongIndex___01_4-members.html create mode 100644 docs/classcutlass_1_1TensorRef_3_01Storage___00_01Rank___00_01MapFunc___00_011_00_01Index___00_01LongIndex___01_4.html create mode 100644 docs/classcutlass_1_1ZipTileIterator-members.html create mode 100644 docs/classcutlass_1_1ZipTileIterator.html create mode 100644 docs/classcutlass_1_1detail_1_1ScalarOrPointer-members.html create mode 100644 docs/classcutlass_1_1detail_1_1ScalarOrPointer.html create mode 100644 docs/classcutlass_1_1gemm_1_1LinearScalingDevicePtr_1_1Params-members.html create mode 100644 docs/classcutlass_1_1gemm_1_1LinearScalingDevicePtr_1_1Params.html create mode 100644 docs/classcutlass_1_1platform_1_1complex-members.html create mode 100644 docs/classcutlass_1_1platform_1_1complex.html create mode 100644 docs/complex_8h.html create mode 100644 docs/complex_8h_source.html create mode 100644 docs/fp16__sgemm__multiply__add_8h.html create mode 100644 docs/fp16__sgemm__multiply__add_8h_source.html create mode 100644 docs/fp16__sgemm__traits_8h.html create mode 100644 docs/fp16__sgemm__traits_8h_source.html create mode 100644 docs/functions_func_k.html create mode 100644 docs/functions_func_n.html create mode 100644 docs/functions_func_z.html create mode 100644 docs/functions_type_k.html create mode 100644 docs/functions_vars_o.html create mode 100644 docs/functions_z.html create mode 100644 docs/gemm__config_8h.html create mode 100644 docs/gemm__config_8h_source.html create mode 100644 docs/gemm__coord_8h.html create mode 100644 docs/gemm__coord_8h_source.html create mode 100644 docs/gemm__desc_8h.html create mode 100644 docs/gemm__desc_8h_source.html create mode 100644 docs/gemm__stream__pair_8h.html create mode 100644 docs/gemm__stream__pair_8h_source.html create mode 100644 docs/group__IdentityBlockSwizzle.html create mode 100644 docs/kernel__launch_8h.html create mode 100644 docs/kernel__launch_8h_source.html create mode 100644 docs/linear__scaling__device__ptr_8h.html create mode 100644 docs/linear__scaling__device__ptr_8h_source.html create mode 100644 docs/namespacecutlass_1_1detail.html create mode 100644 docs/numeric__types_8h.html create mode 100644 docs/numeric__types_8h_source.html create mode 100644 docs/scalar__or__pointer_8h.html create mode 100644 docs/scalar__or__pointer_8h_source.html create mode 100644 docs/search/all_18.html create mode 100644 docs/search/all_18.js create mode 100644 docs/search/classes_13.html create mode 100644 docs/search/classes_13.js create mode 100644 docs/search/classes_14.html create mode 100644 docs/search/classes_14.js create mode 100644 docs/search/enumvalues_4.html create mode 100644 docs/search/enumvalues_4.js create mode 100644 docs/search/enumvalues_5.html create mode 100644 docs/search/enumvalues_5.js create mode 100644 docs/search/files_10.html create mode 100644 docs/search/files_10.js create mode 100644 docs/search/files_f.html create mode 100644 docs/search/files_f.js create mode 100644 docs/search/functions_15.html create mode 100644 docs/search/functions_15.js create mode 100644 docs/search/functions_16.html create mode 100644 docs/search/functions_16.js create mode 100644 docs/search/functions_17.html create mode 100644 docs/search/functions_17.js create mode 100644 docs/search/groups_4.html create mode 100644 docs/search/groups_4.js create mode 100644 docs/search/typedefs_12.html create mode 100644 docs/search/typedefs_12.js create mode 100644 docs/search/variables_11.html create mode 100644 docs/search/variables_11.js create mode 100644 docs/structDebugType.html create mode 100644 docs/structDebugValue.html create mode 100644 docs/structcutlass_1_1DumpType.html create mode 100644 docs/structcutlass_1_1FragmentElementType-members.html create mode 100644 docs/structcutlass_1_1FragmentElementType.html create mode 100644 docs/structcutlass_1_1IdentityTensorMapFunc-members.html create mode 100644 docs/structcutlass_1_1IdentityTensorMapFunc.html create mode 100644 docs/structcutlass_1_1KernelLaunchConfiguration-members.html create mode 100644 docs/structcutlass_1_1KernelLaunchConfiguration.html create mode 100644 docs/structcutlass_1_1KernelLaunchConfiguration.png create mode 100644 docs/structcutlass_1_1Load_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1kS1b28106546bd22002cd52e60197548ce.html create mode 100644 docs/structcutlass_1_1Load_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1kS3de71ba9ef724a37fb1cf315da8bf9b5.html create mode 100644 docs/structcutlass_1_1Load_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1kS840de374cd1e3ee3a4aa6a70a9fa83d1.html create mode 100644 docs/structcutlass_1_1Load_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1kS91ff65dd36f118bca4542df6128d6d99.html create mode 100644 docs/structcutlass_1_1Load_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1kS966cdf9ba449f056a92458bf87878053.html create mode 100644 docs/structcutlass_1_1Load_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1kSc8e65d97c8f615e98ac2e03f0bcd236e.html create mode 100644 docs/structcutlass_1_1Load_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1kSf1f63874ddc1302ed9e60e0478e1a8ad.html create mode 100644 docs/structcutlass_1_1Load_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1kSf2952bcb62d20c76c595eac4c59f7239.html create mode 100644 docs/structcutlass_1_1Load_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1kW847237836867f08e1121b00b7e44d8ae.html create mode 100644 docs/structcutlass_1_1Load_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1kWe58d0048e24352beeec002fd483c53b2.html create mode 100644 docs/structcutlass_1_1Load_3_01Vector_3_01bin1__t_00_0132_01_4_00_01kAccessSize_00_01Memory___00_01Fr16ad55dd7bca84e6a7fc608c26eee889.html create mode 100644 docs/structcutlass_1_1Load_3_01Vector_3_01bin1__t_00_0132_01_4_00_01kAccessSize_00_01Memory___00_01Fra240d9c28383cd3945277ec3a927c538.html create mode 100644 docs/structcutlass_1_1Load_3_01Vector_3_01int4__t_00_018_01_4_00_01kAccessSize_00_01Memory___00_01Fra03475418f27732d726d18feb23feeed2.html create mode 100644 docs/structcutlass_1_1Load_3_01Vector_3_01int4__t_00_018_01_4_00_01kAccessSize_00_01Memory___00_01Fra934529165fa8fecdd392b5302d25ef26.html create mode 100644 docs/structcutlass_1_1Load_3_01Vector_3_01uint4__t_00_018_01_4_00_01kAccessSize_00_01Memory___00_01Fr23088b868c6eeec9377c46892553686f.html create mode 100644 docs/structcutlass_1_1Load_3_01Vector_3_01uint4__t_00_018_01_4_00_01kAccessSize_00_01Memory___00_01Frc8b0aa6ddd9b4317158c26574a6881de.html create mode 100644 docs/structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01FragmentElementType_1_1kScalar_00_01d9e675253ca19588f1ae4bd898579523f.html create mode 100644 docs/structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01FragmentElementType_1_1kScalar_00_01double_00_01kStride_00_0116_01_4.html create mode 100644 docs/structcutlass_1_1MatrixCoord-members.html create mode 100644 docs/structcutlass_1_1MatrixCoord.html create mode 100644 docs/structcutlass_1_1MatrixCoord.png create mode 100644 docs/structcutlass_1_1MatrixLayout_1_1ColumnMajor-members.html create mode 100644 docs/structcutlass_1_1MatrixLayout_1_1ColumnMajor.html create mode 100644 docs/structcutlass_1_1MatrixLayout_1_1ColumnMajorBlockLinear-members.html create mode 100644 docs/structcutlass_1_1MatrixLayout_1_1ColumnMajorBlockLinear.html create mode 100644 docs/structcutlass_1_1MatrixLayout_1_1ColumnMajorInterleaved-members.html create mode 100644 docs/structcutlass_1_1MatrixLayout_1_1ColumnMajorInterleaved.html create mode 100644 docs/structcutlass_1_1MatrixLayout_1_1ContiguousLayout-members.html create mode 100644 docs/structcutlass_1_1MatrixLayout_1_1ContiguousLayout.html create mode 100644 docs/structcutlass_1_1MatrixLayout_1_1RowMajor-members.html create mode 100644 docs/structcutlass_1_1MatrixLayout_1_1RowMajor.html create mode 100644 docs/structcutlass_1_1MatrixLayout_1_1RowMajorBlockLinear-members.html create mode 100644 docs/structcutlass_1_1MatrixLayout_1_1RowMajorBlockLinear.html create mode 100644 docs/structcutlass_1_1MatrixLayout_1_1RowMajorInterleaved-members.html create mode 100644 docs/structcutlass_1_1MatrixLayout_1_1RowMajorInterleaved.html create mode 100644 docs/structcutlass_1_1MatrixTransform-members.html create mode 100644 docs/structcutlass_1_1MatrixTransform.html create mode 100644 docs/structcutlass_1_1Max-members.html create mode 100644 docs/structcutlass_1_1Max.html create mode 100644 docs/structcutlass_1_1Min-members.html create mode 100644 docs/structcutlass_1_1Min.html create mode 100644 docs/structcutlass_1_1PredicatedTileLoadStream-members.html create mode 100644 docs/structcutlass_1_1PredicatedTileLoadStream.html create mode 100644 docs/structcutlass_1_1PredicatedTileLoadStream.png create mode 100644 docs/structcutlass_1_1PredicatedTileStoreStream-members.html create mode 100644 docs/structcutlass_1_1PredicatedTileStoreStream.html create mode 100644 docs/structcutlass_1_1PredicatedTileStoreStream.png create mode 100644 docs/structcutlass_1_1RegularTilePredicateFunctor-members.html create mode 100644 docs/structcutlass_1_1RegularTilePredicateFunctor.html create mode 100644 docs/structcutlass_1_1ScalarIO-members.html create mode 100644 docs/structcutlass_1_1ScalarIO.html create mode 100644 docs/structcutlass_1_1ShapeDivCeiling-members.html create mode 100644 docs/structcutlass_1_1ShapeDivCeiling.html create mode 100644 docs/structcutlass_1_1Store_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1k004b304998a534d76357f834068909f8.html create mode 100644 docs/structcutlass_1_1Store_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1k12f5c8a016a307e76de374322fc00a66.html create mode 100644 docs/structcutlass_1_1Store_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1k220d5790f803f10840e2a92fb9a51dac.html create mode 100644 docs/structcutlass_1_1Store_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1k28cc0b88a16efca73d258128312d2a7e.html create mode 100644 docs/structcutlass_1_1Store_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1k40d038d4bce377843c21a56ebf97d011.html create mode 100644 docs/structcutlass_1_1Store_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1k60eedca420c41e94fd40b41299967ef2.html create mode 100644 docs/structcutlass_1_1Store_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1k775a1d27affec5236489735ed4503c92.html create mode 100644 docs/structcutlass_1_1Store_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1k84da7dcd68ee74b8d2bdb67885b0ca56.html create mode 100644 docs/structcutlass_1_1Store_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1kd9a7e85f80a21c504388612a60462417.html create mode 100644 docs/structcutlass_1_1Store_3_01Scalar___00_01kAccessSize_00_01Memory___00_01FragmentElementType_1_1ke6d73d34fa7b5254cf828804a19842e1.html create mode 100644 docs/structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01FragmentElementType_1_1kScalar_00_013d38935f41bf709e067932b9e042255a.html create mode 100644 docs/structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01FragmentElementType_1_1kScalar_00_0160391c6be5cb1d3f99e012a6a18e486d.html create mode 100644 docs/structcutlass_1_1TensorRefArray-members.html create mode 100644 docs/structcutlass_1_1TensorRefArray.html create mode 100644 docs/structcutlass_1_1TensorRefBatchStrided-members.html create mode 100644 docs/structcutlass_1_1TensorRefBatchStrided.html create mode 100644 docs/structcutlass_1_1TensorRefBatchStrided.png create mode 100644 docs/structcutlass_1_1TensorRef_3_01Storage___00_01Rank___00_01MapFunc___00_011_00_01Index___00_01LongIndex___01_4_1_1StrideVector.html create mode 100644 docs/structcutlass_1_1TileAllocation-members.html create mode 100644 docs/structcutlass_1_1TileAllocation.html create mode 100644 docs/structcutlass_1_1TileCoord-members.html create mode 100644 docs/structcutlass_1_1TileCoord.html create mode 100644 docs/structcutlass_1_1TileCoord.png create mode 100644 docs/structcutlass_1_1TileLoadStream-members.html create mode 100644 docs/structcutlass_1_1TileLoadStream.html create mode 100644 docs/structcutlass_1_1TileLoadStream.png create mode 100644 docs/structcutlass_1_1TileLoadStream_1_1Params-members.html create mode 100644 docs/structcutlass_1_1TileLoadStream_1_1Params.html create mode 100644 docs/structcutlass_1_1TileLoadStream_1_1PredicateVector.html create mode 100644 docs/structcutlass_1_1TileStoreStream-members.html create mode 100644 docs/structcutlass_1_1TileStoreStream.html create mode 100644 docs/structcutlass_1_1TileStoreStream.png create mode 100644 docs/structcutlass_1_1TileStoreStream_1_1Params-members.html create mode 100644 docs/structcutlass_1_1TileStoreStream_1_1Params.html create mode 100644 docs/structcutlass_1_1TileStoreStream_1_1PredicateVector.html create mode 100644 docs/structcutlass_1_1Vectorize_3_01Vector_3_01bin1__t_00_0132_01_4_00_01kLanes___01_4-members.html create mode 100644 docs/structcutlass_1_1Vectorize_3_01Vector_3_01bin1__t_00_0132_01_4_00_01kLanes___01_4.html create mode 100644 docs/structcutlass_1_1Vectorize_3_01Vector_3_01int4__t_00_018_01_4_00_01kLanes___01_4-members.html create mode 100644 docs/structcutlass_1_1Vectorize_3_01Vector_3_01int4__t_00_018_01_4_00_01kLanes___01_4.html create mode 100644 docs/structcutlass_1_1Vectorize_3_01Vector_3_01uint4__t_00_018_01_4_00_01kLanes___01_4-members.html create mode 100644 docs/structcutlass_1_1Vectorize_3_01Vector_3_01uint4__t_00_018_01_4_00_01kLanes___01_4.html create mode 100644 docs/structcutlass_1_1ZipConvert-members.html create mode 100644 docs/structcutlass_1_1ZipConvert.html create mode 100644 docs/structcutlass_1_1ZipFragment-members.html create mode 100644 docs/structcutlass_1_1ZipFragment.html create mode 100644 docs/structcutlass_1_1ZipTensorRef-members.html create mode 100644 docs/structcutlass_1_1ZipTensorRef.html create mode 100644 docs/structcutlass_1_1ZipTileAllocation-members.html create mode 100644 docs/structcutlass_1_1ZipTileAllocation.html create mode 100644 docs/structcutlass_1_1ZipTileIterator_1_1Params-members.html create mode 100644 docs/structcutlass_1_1ZipTileIterator_1_1Params.html create mode 100644 docs/structcutlass_1_1bin1__t.html create mode 100644 docs/structcutlass_1_1gemm_1_1ColumnMajorBlockSwizzle-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1ColumnMajorBlockSwizzle.html create mode 100644 docs/structcutlass_1_1gemm_1_1Fp16SgemmConfig-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1Fp16SgemmConfig.html create mode 100644 docs/structcutlass_1_1gemm_1_1Fp16SgemmConfig.png create mode 100644 docs/structcutlass_1_1gemm_1_1Fp16SgemmSgemmTraits-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1Fp16SgemmSgemmTraits.html create mode 100644 docs/structcutlass_1_1gemm_1_1Fp16SgemmSgemmTraits.png create mode 100644 docs/structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_00_01half_00_01true_01_4-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_00_01half_00_01true_01_4.html create mode 100644 docs/structcutlass_1_1gemm_1_1GemmCoord-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1GemmCoord.html create mode 100644 docs/structcutlass_1_1gemm_1_1GemmCoord.png create mode 100644 docs/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.png create mode 100644 docs/structcutlass_1_1gemm_1_1GemmTraits_1_1Params.png create mode 100644 docs/structcutlass_1_1gemm_1_1GlobalLoadStreamPair-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1GlobalLoadStreamPair.html create mode 100644 docs/structcutlass_1_1gemm_1_1GlobalLoadStreamPair_1_1Params-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1GlobalLoadStreamPair_1_1Params.html create mode 100644 docs/structcutlass_1_1gemm_1_1GlobalLoadStreamPair_1_1SharedStorage-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1GlobalLoadStreamPair_1_1SharedStorage.html create mode 100644 docs/structcutlass_1_1gemm_1_1GlobalLoadStream_1_1Params-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1GlobalLoadStream_1_1Params.html create mode 100644 docs/structcutlass_1_1gemm_1_1GlobalLoadStream_1_1SharedStorage.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01ThreadGemmShape___01_4-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01ThreadGemmShape___01_4.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01ThreadGemmShape___01_4.png create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmGlobalIteratorAb-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmGlobalIteratorAb.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmGlobalIteratorAb.png create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmGlobalTileTraits-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmGlobalTileTraits.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmGlobalTileTraits.png create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmGlobalTileTraits_1_1ThreadOffset-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmGlobalTileTraits_1_1ThreadOffset.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___00_01Index___01_4-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___00_01Index___01_4.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___00_01Index___01_4.png create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___00_01Index___01_4-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___00_01Index___01_4.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___00_01Index___01_4-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___00_01Index___01_4.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___00_01Index___01_4-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___00_01Index___01_4.html create mode 100644 docs/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___00_01Index___01_4.png create mode 100644 docs/structcutlass_1_1gemm_1_1Launch-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1Launch.html create mode 100644 docs/structcutlass_1_1gemm_1_1Launch_3_01Gemm_00_01false_01_4-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1Launch_3_01Gemm_00_01false_01_4.html create mode 100644 docs/structcutlass_1_1gemm_1_1LinearScaling.png create mode 100644 docs/structcutlass_1_1gemm_1_1LinearScalingDevicePtr-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1LinearScalingDevicePtr.html create mode 100644 docs/structcutlass_1_1gemm_1_1LinearScalingDevicePtr.png create mode 100644 docs/structcutlass_1_1gemm_1_1RowMajorBlockSwizzle-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1RowMajorBlockSwizzle.html create mode 100644 docs/structcutlass_1_1gemm_1_1SgemmLBTraits-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1SgemmLBTraits.html create mode 100644 docs/structcutlass_1_1gemm_1_1SgemmLBTraits.png create mode 100644 docs/structcutlass_1_1gemm_1_1SharedStreamPair-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1SharedStreamPair.html create mode 100644 docs/structcutlass_1_1gemm_1_1SharedStreamPair_1_1Params-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1SharedStreamPair_1_1Params.html create mode 100644 docs/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01ThreadGemmShape___00_01ThreadsPerWarp___00_01half030e27fde4380ad93cd574bc743e0ba3.html create mode 100644 docs/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01ThreadGemmShape___00_01ThreadsPerWarp___00_01half8ea66703da782e035d986e48031ff835.html create mode 100644 docs/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01ThreadGemmShape___00_01ThreadsPerWarp___00_01half_00_01half_00_01float_01_4.html create mode 100644 docs/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01ThreadGemmShape___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html create mode 100644 docs/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01ThreadGemmShape___00_01ThreadsPerWarp___00_01int86f255c0f34c1afba22b3a7d64d8f85bf.html create mode 100644 docs/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01ThreadGemmShape___00_01ThreadsPerWarp___00_01int8__t_00_01int8__t_00_01int_01_4.html create mode 100644 docs/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.png create mode 100644 docs/structcutlass_1_1gemm_1_1swizzleDirection-members.html create mode 100644 docs/structcutlass_1_1gemm_1_1swizzleDirection.html create mode 100644 docs/structcutlass_1_1int4__t.html create mode 100644 docs/structcutlass_1_1uint4__t.html create mode 100644 docs/tensor__ref__collection_8h.html create mode 100644 docs/tensor__ref__collection_8h_source.html create mode 100644 docs/threadblock__swizzle_8h.html create mode 100644 docs/threadblock__swizzle_8h_source.html create mode 100644 docs/tile__allocation_8h.html create mode 100644 docs/tile__allocation_8h_source.html create mode 100644 docs/tile__coord_8h.html create mode 100644 docs/tile__coord_8h_source.html create mode 100644 docs/tile__stream_8h.html create mode 100644 docs/tile__stream_8h_source.html create mode 100644 docs/unioncutlass_1_1Vector_3_01bin1__t_00_01kLanes___01_4-members.html create mode 100644 docs/unioncutlass_1_1Vector_3_01bin1__t_00_01kLanes___01_4.html create mode 100644 docs/unioncutlass_1_1Vector_3_01half_00_011_01_4-members.html create mode 100644 docs/unioncutlass_1_1Vector_3_01half_00_011_01_4.html create mode 100644 docs/unioncutlass_1_1Vector_3_01int4__t_00_01kLanes___01_4-members.html create mode 100644 docs/unioncutlass_1_1Vector_3_01int4__t_00_01kLanes___01_4.html create mode 100644 docs/unioncutlass_1_1Vector_3_01uint4__t_00_01kLanes___01_4-members.html create mode 100644 docs/unioncutlass_1_1Vector_3_01uint4__t_00_01kLanes___01_4.html create mode 100644 docs/zip__fragment_8h.html create mode 100644 docs/zip__fragment_8h_source.html create mode 100644 docs/zip__tensor__ref_8h.html create mode 100644 docs/zip__tensor__ref_8h_source.html create mode 100644 docs/zip__tile__iterator_8h.html create mode 100644 docs/zip__tile__iterator_8h_source.html diff --git a/docs/annotated.html b/docs/annotated.html index e6c405d59..da54a8ee0 100644 --- a/docs/annotated.html +++ b/docs/annotated.html @@ -74,303 +74,368 @@ $(function() {

Here are the classes, structs, unions and interfaces with brief descriptions:
[detail level 1234]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 Ncutlass
 Ngemm
 Nplatform
 CAlignedStruct
 CComputeOffsetFromShapeCompute the offset for the given coordinates in a cube
 CComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >Compute the offset for the given coordinates in a cube with one channel and a depth of 1
 CComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >Compute the offset for the given coordinates in a cube with a depth of 1
 CComputeOffsetFromStridesCompute the offset for the given coordinates in a cube
 CComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >Compute the offset for the given coordinates in a cube with one channel and a depth of 1
 CComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >Compute the offset for the given coordinates in a cube with a depth of 1
 CComputeThreadOffsetFromStridesDecompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_
 CComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >Specialization for D=1 and C=1
 CComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >Specialization for D=1
 CConstPredicateTileAdapterAdapter to enable random access to predicates via logical coordinate within a tile
 CConvert
 CConvert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >
 CCoordStatically-sized array specifying Coords within a tensor
 CCopy
 Cdivide_assert
 CExtentReturns the extent of a scalar or vector
 CExtent< Vector< T, Lanes > >Returns the number of lanes of a vector if need be
 CExtent< Vector< T, Lanes > const >Returns the number of lanes of a vector if need be
 CFragmentA template defining Fragment Concept
 CFragmentConstIterator
 CFragmentIteratorA template defining Fragment Iterator Concept
 CFragmentLoad
 CFragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 CFragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 CFragmentStore
 CFragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 CFragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 CGemmOperandGemm operand - D = A * B + C
 CIdentityDescribes identity elements
 Cis_pow2
 CIteratorAdvanceSpecifies dimension in which post-increment accesses advance
 CIteratorFragmentSpecifies whether iterator storage fragment consists of Scalar values or WMMA matrix
 CLoad
 CLoad< double, 2, Memory_, true, 16 >
 CLoad< Scalar_, Lanes_, Memory_, true, 16 >
 CLoad< Scalar_, Lanes_, Memory_, true, 4 >
 CLoad< Scalar_, Lanes_, Memory_, true, 8 >
 Clog2_down
 Clog2_down< N, 1, Count >
 Clog2_up
 Clog2_up< N, 1, Count >
 CMatrixLayoutDescribes layouts of matrices
 CMemorySpaceEnum to specify which memory space data resides in
 CPredicateTileAdapterAdapter to enable random access to predicates via logical coordinate within a tile
 CPredicateVectorStatically sized array of bits implementing
 CReshapeTile
 CReshapeTile< Tile_, kAccessSize_, true >
 CShapeA Shape implementing Layout Concept describing the dimensions of a cube
 CShapeAdd
 CShapeCountCompute derived counted of a Layout Concept based class
 CShapeDiv
 CShapeMax
 CShapeMin
 CShapeMul
 CShapeScale
 CShapeStrides
 CShapeSub
 Csqrt_est
 CStorageType
 CStorageType< 1 >
 CStorageType< 2 >
 CStorageType< 4 >
 CStore
 CStore< double, 2, Memory_, true, 16 >
 CStore< Scalar_, Lanes_, Memory_, true, 16 >
 CStore< Scalar_, Lanes_, Memory_, true, 4 >
 CStore< Scalar_, Lanes_, Memory_, true, 8 >
 CTensorRefStructure modeling a pointer and stride into a tensor
 CTensorViewHost-side reference implementation of tensor operations
 CTiledThreadOffsetBasic thread offset function computed from a thread shape
 CTileIteratorBaseIterator for accessing a stripmined tile in memory
 CTileLoadIteratorAn iterator implementing Tile Load Iterator Concept for loading a tile from memory
 CTileStoreIteratorAn iterator implementing Tile Store Iterator Concept for storing a tile to memory
 CTileTraitsA template defining Tile Traits Concept
 CTileTraitsContiguousMajor
 CTileTraitsStandardChooses 'best' shape to enable warp raking along contiguous dimension if possible
 CTileTraitsStrideMajor
 CTileTraitsWarpRakeTiling in which warps rake across the contiguous dimension
 CTrivialPredicateTileAdapterAlways returns true predicate
 CVector
 CVector< half, kLanes_ >
 CVectorize
 CVectorize< Element_, 1 >
 CVectorTraitsTraits describing properties of vectors and scalar-as-vectors
 CVectorTraits< Vector< T, Lanes > >Partial specialization for actual cutlass::Vector
 CVectorTraits< Vector< T, Lanes > const >Partial specialization for actual cutlass::Vector
 Ncutlass
 CDebugType
 CDebugValue
diff --git a/docs/classcutlass_1_1PredicateVector_1_1ConstIterator-members.html b/docs/classcutlass_1_1PredicateVector_1_1ConstIterator-members.html index 860cd05cb..18f59fc0c 100644 --- a/docs/classcutlass_1_1PredicateVector_1_1ConstIterator-members.html +++ b/docs/classcutlass_1_1PredicateVector_1_1ConstIterator-members.html @@ -91,7 +91,7 @@ $(function() { diff --git a/docs/classcutlass_1_1PredicateVector_1_1ConstIterator.html b/docs/classcutlass_1_1PredicateVector_1_1ConstIterator.html index 1fbdc759c..7e7089a06 100644 --- a/docs/classcutlass_1_1PredicateVector_1_1ConstIterator.html +++ b/docs/classcutlass_1_1PredicateVector_1_1ConstIterator.html @@ -381,7 +381,7 @@ template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ diff --git a/docs/classcutlass_1_1PredicateVector_1_1Iterator-members.html b/docs/classcutlass_1_1PredicateVector_1_1Iterator-members.html index ca3ff04aa..73d0ebcaa 100644 --- a/docs/classcutlass_1_1PredicateVector_1_1Iterator-members.html +++ b/docs/classcutlass_1_1PredicateVector_1_1Iterator-members.html @@ -93,7 +93,7 @@ $(function() { diff --git a/docs/classcutlass_1_1PredicateVector_1_1Iterator.html b/docs/classcutlass_1_1PredicateVector_1_1Iterator.html index 42a069382..2cbc797d8 100644 --- a/docs/classcutlass_1_1PredicateVector_1_1Iterator.html +++ b/docs/classcutlass_1_1PredicateVector_1_1Iterator.html @@ -443,7 +443,7 @@ template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ diff --git a/docs/classcutlass_1_1TensorRef-members.html b/docs/classcutlass_1_1TensorRef-members.html index 4bf37ad13..202c9ab42 100644 --- a/docs/classcutlass_1_1TensorRef-members.html +++ b/docs/classcutlass_1_1TensorRef-members.html @@ -73,35 +73,52 @@ $(function() {
-
cutlass::TensorRef< Storage_, Rank_ > Member List
+
cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > Member List
-

This is the complete list of members for cutlass::TensorRef< Storage_, Rank_ >, including all inherited members.

+

This is the complete list of members for cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >, including all inherited members.

- - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
advance(Coord< Rank > const &b)cutlass::TensorRef< Storage_, Rank_ >inline
at(Coord< Rank > const &coord) constcutlass::TensorRef< Storage_, Rank_ >inline
at(int idx) constcutlass::TensorRef< Storage_, Rank_ >inline
convert()cutlass::TensorRef< Storage_, Rank_ >inline
data() constcutlass::TensorRef< Storage_, Rank_ >inline
good() constcutlass::TensorRef< Storage_, Rank_ >inline
leading_dim() constcutlass::TensorRef< Storage_, Rank_ >inline
offset(Coord< Rank > const &coord) constcutlass::TensorRef< Storage_, Rank_ >inline
operator+(Coord< Rank > const &b) constcutlass::TensorRef< Storage_, Rank_ >inline
operator-(Coord< Rank > const &b) constcutlass::TensorRef< Storage_, Rank_ >inline
operator[](Coord< Rank > const &coord) constcutlass::TensorRef< Storage_, Rank_ >inline
operator[](int idx) constcutlass::TensorRef< Storage_, Rank_ >inline
Rankcutlass::TensorRef< Storage_, Rank_ >static
reset(Storage *ptr=nullptr, Coord< Rank > stride=Coord< Rank >(0))cutlass::TensorRef< Storage_, Rank_ >inline
Storage typedefcutlass::TensorRef< Storage_, Rank_ >
stride() constcutlass::TensorRef< Storage_, Rank_ >inline
stride(int dim) constcutlass::TensorRef< Storage_, Rank_ >inline
TensorRef()cutlass::TensorRef< Storage_, Rank_ >inline
TensorRef(Storage *ptr, Coord< Rank > stride)cutlass::TensorRef< Storage_, Rank_ >inline
add_pointer_offset(LongIndex delta)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
at(TensorCoord const &coord) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
at(LongIndex idx) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
const_ref() constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
ConstTensorRef typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
Coord_t typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
data() constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
good() constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
Index typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
kRankcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >static
kStorageRankcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >static
leading_dim(int idx=0) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
LongIndex typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
map(TensorCoord const &coord) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
MapFunc typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
offset(TensorCoord const &coord) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
operator+(TensorCoord const &b) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
operator+=(TensorCoord const &b)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
operator-(TensorCoord const &b) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
operator-=(TensorCoord const &b)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
operator[](TensorCoord const &coord) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
operator[](LongIndex idx) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
Rankcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >static
reset(Storage *ptr=nullptr)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
reset(Storage *ptr, StorageCoord const &stride)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
Storage typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
StorageCoord typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
stride() constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
stride(int dim) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
StrideVector typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
TensorCoord typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
TensorRef(Storage *ptr=nullptr)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
TensorRef(Storage *ptr, Index ldm)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
TensorRef(Storage *ptr, StrideVector const &stride)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
TensorRef(Storage *ptr, StorageCoord const &stride)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
TensorRef(TensorRef< typename platform::remove_const< Storage >::type, kRank, MapFunc, kStorageRank, Index, LongIndex > const &ref)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
diff --git a/docs/classcutlass_1_1TensorRef.html b/docs/classcutlass_1_1TensorRef.html index 05a9b3dd5..1053ca0a9 100644 --- a/docs/classcutlass_1_1TensorRef.html +++ b/docs/classcutlass_1_1TensorRef.html @@ -5,7 +5,7 @@ -Cutlass: cutlass::TensorRef< Storage_, Rank_ > Class Template Reference +Cutlass: cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > Class Template Reference @@ -78,93 +78,278 @@ $(function() { Static Public Attributes | List of all members
-
cutlass::TensorRef< Storage_, Rank_ > Class Template Reference
+
cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > Class Template Reference
-

Structure modeling a pointer and stride into a tensor. -

-

#include <tensor_ref.h>

+
+Inheritance diagram for cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >:
+
+
+ + +cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > +cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > + +
- - - + + + + + + + + + + + + + + + + + + + + + + + + + +

Public Types

typedef Storage_ Storage
 Data type of individual access. More...
 
typedef Storage_ Storage
 Data type of individual access. More...
 
typedef MapFunc_ MapFunc
 Mapping function from logical coordinate to internal n-D array. More...
 
typedef Index_ Index
 Index type. More...
 
typedef LongIndex_ LongIndex
 Typically, strides in memory can be very large. More...
 
typedef Coord< kRankTensorCoord
 Coordinate in logical tensor space. More...
 
typedef Coord< kStorageRankStorageCoord
 Coordinate in storage n-D array. More...
 
typedef Coord< kStorageRank - 1 > StrideVector
 
typedef TensorRef< typename platform::remove_const< Storage >::type const, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > ConstTensorRef
 Tensor reference to of constant value. More...
 
typedef TensorCoord Coord_t
 Coordinate in logical tensor space. More...
 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Public Member Functions

CUTLASS_HOST_DEVICE TensorRef ()
 Default ctor. More...
 
CUTLASS_HOST_DEVICE TensorRef (Storage *ptr, Coord< Rank > stride)
 Constructs from a pointer, size, and stride. More...
 
CUTLASS_HOST_DEVICE void reset (Storage *ptr=nullptr, Coord< Rank > stride=Coord< Rank >(0))
 Updates the pointer, stride, and location within a TensorRef. More...
 
template<typename T >
TensorRef< T, Rankconvert ()
 Conversion function. More...
 
CUTLASS_HOST_DEVICE bool good () const
 Returns true if the TensorRef may be safely accessed. More...
 
CUTLASS_HOST_DEVICE Storagedata () const
 Returns the pointer to referenced data. More...
 
CUTLASS_HOST_DEVICE Coord< Rank > const & stride () const
 Returns the stride of the tensor. More...
 
CUTLASS_HOST_DEVICE int const & stride (int dim) const
 Returns the stride of the tensor in the given dimension. More...
 
CUTLASS_HOST_DEVICE int leading_dim () const
 Returns the maximum stride element as the 'leading dimension'. More...
 
CUTLASS_HOST_DEVICE long long offset (Coord< Rank > const &coord) const
 Computes the offset of an index from the origin of the tensor. More...
 
CUTLASS_HOST_DEVICE Storageat (Coord< Rank > const &coord) const
 Returns a reference to the element at a given Coord. More...
 
Storageoperator[] (Coord< Rank > const &coord) const
 Element-wise accessor. More...
 
CUTLASS_HOST_DEVICE Storageat (int idx) const
 Returns a reference to the element at a given Coord. More...
 
Storageoperator[] (int idx) const
 Element-wise accessor. More...
 
CUTLASS_HOST_DEVICE TensorRefadvance (Coord< Rank > const &b)
 Adds an offset to the pointer. More...
 
CUTLASS_HOST_DEVICE TensorRef operator+ (Coord< Rank > const &b) const
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorRef operator- (Coord< Rank > const &b) const
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorRef (Storage *ptr=nullptr)
 Helper for 1-D memory. All higher ranks are projected onto the fastest changing rank. More...
 
CUTLASS_HOST_DEVICE TensorRef (Storage *ptr, Index ldm)
 Helper to construct from a pointer and single stride element for 2-D pitch linear memory. More...
 
CUTLASS_HOST_DEVICE TensorRef (Storage *ptr, StrideVector const &stride)
 Constructs from a single pointer and stride vector. More...
 
CUTLASS_HOST_DEVICE TensorRef (Storage *ptr, StorageCoord const &stride)
 
CUTLASS_HOST_DEVICE TensorRef (TensorRef< typename platform::remove_const< Storage >::type, kRank, MapFunc, kStorageRank, Index, LongIndex > const &ref)
 Enables conversion from TensorRef of non-const type. More...
 
CUTLASS_HOST_DEVICE ConstTensorRef const_ref () const
 Returns a reference to constant-valued tensor. More...
 
CUTLASS_HOST_DEVICE void reset (Storage *ptr=nullptr)
 Updates only the pointer. More...
 
CUTLASS_HOST_DEVICE void reset (Storage *ptr, StorageCoord const &stride)
 Updates the pointer, stride, and location within a TensorRef. More...
 
CUTLASS_HOST_DEVICE bool good () const
 Returns true if the TensorRef may be safely accessed. More...
 
CUTLASS_HOST_DEVICE Storagedata () const
 Returns the pointer to referenced data. More...
 
CUTLASS_HOST_DEVICE StorageCoord stride () const
 Returns the stride of the tensor. More...
 
CUTLASS_HOST_DEVICE Index stride (int dim) const
 Returns the stride of the tensor in the given dimension. More...
 
CUTLASS_HOST_DEVICE Index leading_dim (int idx=0) const
 Returns the maximum stride element as the 'leading dimension'. More...
 
CUTLASS_HOST_DEVICE StorageCoord map (TensorCoord const &coord) const
 Maps a logical coordinate to an n-D array in memory. More...
 
CUTLASS_HOST_DEVICE LongIndex offset (TensorCoord const &coord) const
 Computes the offset of an index from the origin of the tensor. More...
 
CUTLASS_HOST_DEVICE Storageat (TensorCoord const &coord) const
 Returns a reference to the element at a given Coord. More...
 
CUTLASS_HOST_DEVICE Storageat (LongIndex idx) const
 Returns a reference to the element at a given linear index. More...
 
CUTLASS_HOST_DEVICE Storageoperator[] (TensorCoord const &coord) const
 Returns a reference to the element at a given Coord. More...
 
CUTLASS_HOST_DEVICE Storageoperator[] (LongIndex idx) const
 Returns a reference to the element at a given linear index. More...
 
CUTLASS_HOST_DEVICE TensorRefadd_pointer_offset (LongIndex delta)
 Adds an offset to each pointer. More...
 
CUTLASS_HOST_DEVICE TensorRef operator+ (TensorCoord const &b) const
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorRefoperator+= (TensorCoord const &b)
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorRef operator- (TensorCoord const &b) const
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorRefoperator-= (TensorCoord const &b)
 Returns a TensorRef offset by a given amount. More...
 
- - - + + + + + + + + +

Static Public Attributes

static int const Rank = Rank_
 Rank of tensor. More...
 
static int const kRank = Rank_
 Logical rank of tensor index space. More...
 
static int const kStorageRank = StorageRank_
 Rank of internal storage. More...
 
static int const Rank = kRank
 Logical rank of tensor index space. More...
 

Member Typedef Documentation

- -

◆ Storage

+ +

◆ ConstTensorRef

-template<typename Storage_, int Rank_>
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
- + + +
typedef Storage_ cutlass::TensorRef< Storage_, Rank_ >::Storagetypedef TensorRef< typename platform::remove_const<Storage>::type const, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_> cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstTensorRef
+
+ +
+
+ +

◆ Coord_t

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + +
typedef TensorCoord cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::Coord_t
+
+

Require at least rank=1. Mathematically, a rank=0 tensor would be considered to be a scalar, but degenerate cases such as these are difficult to accommodate without extensive C++ metaprogramming or support for zero-length arrays.

+ +
+
+ +

◆ Index

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + +
typedef Index_ cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::Index
+
+ +
+
+ +

◆ LongIndex

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + +
typedef LongIndex_ cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::LongIndex
+
+ +
+
+ +

◆ MapFunc

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + +
typedef MapFunc_ cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::MapFunc
+
+ +
+
+ +

◆ Storage

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + +
typedef Storage_ cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::Storage
+
+ +
+
+ +

◆ StorageCoord

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + +
typedef Coord<kStorageRank> cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::StorageCoord
+
+ +
+
+ +

◆ StrideVector

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + +
typedef Coord<kStorageRank - 1> cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::StrideVector
+
+

Stride vector in storage coordinage space - assumes least significant stride is 1 and does not store it.

+ +
+
+ +

◆ TensorCoord

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + +
typedef Coord<kRank> cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::TensorCoord
@@ -172,21 +357,22 @@ template<typename Storage_, int Rank_>

Constructor & Destructor Documentation

- -

◆ TensorRef() [1/2]

+ +

◆ TensorRef() [1/5]

-template<typename Storage_, int Rank_>
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
diff --git a/docs/hgemm__global__tile_8h.html b/docs/hgemm__global__tile_8h.html index b62b8c143..0b2e247f5 100644 --- a/docs/hgemm__global__tile_8h.html +++ b/docs/hgemm__global__tile_8h.html @@ -82,10 +82,10 @@ $(function() {

Tile traits used to construct global tile iterator for HGEMM. This is intended to partition the thread block-level tile into 2D subtiles loaded by the threads and facilitate memory accesses larger than 16 bits. More...

-
- + - + +
CUTLASS_HOST_DEVICE cutlass::TensorRef< Storage_, Rank_ >::TensorRef CUTLASS_HOST_DEVICE cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::TensorRef ()Storageptr = nullptr)
@@ -199,27 +385,65 @@ template<typename Storage_, int Rank_> - -

◆ TensorRef() [2/2]

+ +

◆ TensorRef() [2/5]

-template<typename Storage_, int Rank_>
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + +
- + - + - + + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::TensorRef< Storage_, Rank_ >::TensorRef CUTLASS_HOST_DEVICE cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::TensorRef (StorageStorage ptr,
Coord< RankIndex ldm 
)
+
+inline
+
+ +
+ + +

◆ TensorRef() [3/5]

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + diff --git a/docs/group__tile__traits__concept.html b/docs/group__tile__traits__concept.html index 16e4bd8ae..6c0516967 100644 --- a/docs/group__tile__traits__concept.html +++ b/docs/group__tile__traits__concept.html @@ -77,7 +77,7 @@ $(function() {
+ + + + + + + + + + + @@ -237,363 +461,541 @@ template<typename Storage_, int Rank_> -

Member Function Documentation

- -

◆ advance()

+ +

◆ TensorRef() [4/5]

-template<typename Storage_, int Rank_>
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
CUTLASS_HOST_DEVICE cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::TensorRef (Storageptr,
StrideVector const &  stride 
- - -
- + - - - - -
CUTLASS_HOST_DEVICE TensorRef& cutlass::TensorRef< Storage_, Rank_ >::advance CUTLASS_HOST_DEVICE cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::TensorRef (Coord< Rank > const & b)
-
-inline
-
- -
- - -

◆ at() [1/2]

- -
-
-
-template<typename Storage_, int Rank_>
- - - - - -
- - - - - - - - -
CUTLASS_HOST_DEVICE Storage& cutlass::TensorRef< Storage_, Rank_ >::at (Coord< Rank > const & coord) const
-
-inline
-
- -
-
- -

◆ at() [2/2]

- -
-
-
-template<typename Storage_, int Rank_>
- - - - - -
- - - - - - - - -
CUTLASS_HOST_DEVICE Storage& cutlass::TensorRef< Storage_, Rank_ >::at (int idx) const
-
-inline
-
- -
-
- -

◆ convert()

- -
-
-
-template<typename Storage_, int Rank_>
-
-template<typename T >
- - - - - -
- - - - - - - -
TensorRef<T, Rank> cutlass::TensorRef< Storage_, Rank_ >::convert ()
-
-inline
-
- -
-
- -

◆ data()

- -
-
-
-template<typename Storage_, int Rank_>
- - - - - -
- - - - - - - -
CUTLASS_HOST_DEVICE Storage* cutlass::TensorRef< Storage_, Rank_ >::data () const
-
-inline
-
- -
-
- -

◆ good()

- -
-
-
-template<typename Storage_, int Rank_>
- - - - - -
- - - - - - - -
CUTLASS_HOST_DEVICE bool cutlass::TensorRef< Storage_, Rank_ >::good () const
-
-inline
-
- -
-
- -

◆ leading_dim()

- -
-
-
-template<typename Storage_, int Rank_>
- - - - - -
- - - - - - - -
CUTLASS_HOST_DEVICE int cutlass::TensorRef< Storage_, Rank_ >::leading_dim () const
-
-inline
-
- -
-
- -

◆ offset()

- -
-
-
-template<typename Storage_, int Rank_>
- - - - - -
- - - - - - - - -
CUTLASS_HOST_DEVICE long long cutlass::TensorRef< Storage_, Rank_ >::offset (Coord< Rank > const & coord) const
-
-inline
-
- -
-
- -

◆ operator+()

- -
-
-
-template<typename Storage_, int Rank_>
- - - - - -
- - - - - - - - -
CUTLASS_HOST_DEVICE TensorRef cutlass::TensorRef< Storage_, Rank_ >::operator+ (Coord< Rank > const & b) const
-
-inline
-
- -
-
- -

◆ operator-()

- -
-
-
-template<typename Storage_, int Rank_>
- - - - - -
- - - - - - - - -
CUTLASS_HOST_DEVICE TensorRef cutlass::TensorRef< Storage_, Rank_ >::operator- (Coord< Rank > const & b) const
-
-inline
-
- -
-
- -

◆ operator[]() [1/2]

- -
-
-
-template<typename Storage_, int Rank_>
- - - - - -
- - - - - - - - -
Storage& cutlass::TensorRef< Storage_, Rank_ >::operator[] (Coord< Rank > const & coord) const
-
-inline
-
- -
-
- -

◆ operator[]() [2/2]

- -
-
-
-template<typename Storage_, int Rank_>
- - - - - -
- - - - - - - - -
Storage& cutlass::TensorRef< Storage_, Rank_ >::operator[] (int idx) const
-
-inline
-
- -
-
- -

◆ reset()

- -
-
-
-template<typename Storage_, int Rank_>
- - - + + +
- - - - - - + + - - + + + + + + + + +
CUTLASS_HOST_DEVICE void cutlass::TensorRef< Storage_, Rank_ >::reset (Storageptr = nullptr, Storageptr,
Coord< Rankstride = Coord<Rank>(0) StorageCoord const & stride 
)
+
+inline
+
+

Constructs from a pointer and a stride vector of size kRank. If fastest changing stride is not 1, construction fails and subsequent calls to good() will return false.

+ +
+
+ +

◆ TensorRef() [5/5]

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::TensorRef (TensorRef< typename platform::remove_const< Storage >::type, kRank, MapFunc, kStorageRank, Index, LongIndex > const & ref)
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ add_pointer_offset()

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorRef& cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::add_pointer_offset (LongIndex delta)
+
+inline
+
+ +
+
+ +

◆ at() [1/2]

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Storage& cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::at (TensorCoord const & coord) const
+
+inline
+
+ +
+
+ +

◆ at() [2/2]

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Storage& cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::at (LongIndex idx) const
+
+inline
+
+ +
+
+ +

◆ const_ref()

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE ConstTensorRef cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::const_ref () const
+
+inline
+
+ +
+
+ +

◆ data()

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Storage* cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::data () const
+
+inline
+
+ +
+
+ +

◆ good()

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::good () const
+
+inline
+
+ +
+
+ +

◆ leading_dim()

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Index cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::leading_dim (int idx = 0) const
+
+inline
+
+ +
+
+ +

◆ map()

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE StorageCoord cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::map (TensorCoord const & coord) const
+
+inline
+
+ +
+
+ +

◆ offset()

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE LongIndex cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::offset (TensorCoord const & coord) const
+
+inline
+
+ +
+
+ +

◆ operator+()

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorRef cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::operator+ (TensorCoord const & b) const
+
+inline
+
+ +
+
+ +

◆ operator+=()

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorRef& cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::operator+= (TensorCoord const & b)
+
+inline
+
+ +
+
+ +

◆ operator-()

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorRef cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::operator- (TensorCoord const & b) const
+
+inline
+
+ +
+
+ +

◆ operator-=()

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorRef& cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::operator-= (TensorCoord const & b)
+
+inline
+
+ +
+
+ +

◆ operator[]() [1/2]

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Storage& cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::operator[] (TensorCoord const & coord) const
+
+inline
+
+ +
+
+ +

◆ operator[]() [2/2]

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Storage& cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::operator[] (LongIndex idx) const
+
+inline
+
+ +
+
+ +

◆ reset() [1/2]

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE void cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::reset (Storageptr = nullptr)
+
+inline
+
+ +
+
+ +

◆ reset() [2/2]

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + diff --git a/docs/group__tile__store__iterator__concept.html b/docs/group__tile__store__iterator__concept.html index bde540531..992a7ca39 100644 --- a/docs/group__tile__store__iterator__concept.html +++ b/docs/group__tile__store__iterator__concept.html @@ -77,7 +77,7 @@ $(function() {
+ + + + + + + + + + + + @@ -610,19 +1012,19 @@ template<typename Storage_, int Rank_> - -

◆ stride() [1/2]

+ +

◆ stride() [1/2]

-template<typename Storage_, int Rank_>
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
CUTLASS_HOST_DEVICE void cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::reset (Storageptr,
StorageCoord const & stride 
diff --git a/docs/group__fragment__iterator__concept.html b/docs/group__fragment__iterator__concept.html index dc89e72e5..e08d36e44 100644 --- a/docs/group__fragment__iterator__concept.html +++ b/docs/group__fragment__iterator__concept.html @@ -91,7 +91,7 @@ Classes diff --git a/docs/group__layout__concept.html b/docs/group__layout__concept.html index 3fe8532c8..66a828819 100644 --- a/docs/group__layout__concept.html +++ b/docs/group__layout__concept.html @@ -100,7 +100,7 @@ Classes diff --git a/docs/group__predicate__iterator__concept.html b/docs/group__predicate__iterator__concept.html index 95c1ef2ef..9c3b71084 100644 --- a/docs/group__predicate__iterator__concept.html +++ b/docs/group__predicate__iterator__concept.html @@ -98,7 +98,7 @@ Classes diff --git a/docs/group__predicate__tile__adapter.html b/docs/group__predicate__tile__adapter.html index a4b809922..8ab28fed9 100644 --- a/docs/group__predicate__tile__adapter.html +++ b/docs/group__predicate__tile__adapter.html @@ -80,7 +80,7 @@ $(function() { diff --git a/docs/group__predicate__vector__concept.html b/docs/group__predicate__vector__concept.html index 5147870e6..cf4fd5b2a 100644 --- a/docs/group__predicate__vector__concept.html +++ b/docs/group__predicate__vector__concept.html @@ -92,7 +92,7 @@ Classes diff --git a/docs/group__tile__load__iterator__concept.html b/docs/group__tile__load__iterator__concept.html index 2bc4b4e34..edc492818 100644 --- a/docs/group__tile__load__iterator__concept.html +++ b/docs/group__tile__load__iterator__concept.html @@ -77,7 +77,7 @@ $(function() {
- + @@ -637,19 +1039,19 @@ template<typename Storage_, int Rank_> - -

◆ stride() [2/2]

+ +

◆ stride() [2/2]

-template<typename Storage_, int Rank_>
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
CUTLASS_HOST_DEVICE Coord<Rank> const& cutlass::TensorRef< Storage_, Rank_ >::stride CUTLASS_HOST_DEVICE StorageCoord cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::stride ( ) const
+ + +
- + @@ -666,19 +1068,67 @@ template<typename Storage_, int Rank_>

Member Data Documentation

- -

◆ Rank

+ +

◆ kRank

-template<typename Storage_, int Rank_>
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
CUTLASS_HOST_DEVICE int const& cutlass::TensorRef< Storage_, Rank_ >::stride CUTLASS_HOST_DEVICE Index cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::stride ( int  dim)
+ + +
- + + +
int const cutlass::TensorRef< Storage_, Rank_ >::Rank = Rank_int const cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::kRank = Rank_
+
+static
+
+ +
+ + +

◆ kStorageRank

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + +
int const cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::kStorageRank = StorageRank_
+
+static
+
+ +
+
+ +

◆ Rank

+ +
+
+
+template<typename Storage_, int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + @@ -696,7 +1146,7 @@ template<typename Storage_, int Rank_> diff --git a/docs/classcutlass_1_1TensorRef.png b/docs/classcutlass_1_1TensorRef.png new file mode 100644 index 0000000000000000000000000000000000000000..f8caaa61d05cb4460b85cc8dce3844d1f38cc22d GIT binary patch literal 2451 zcmb_edo+~o7N^o>+*VO2bjr{umz5A@NMF)m7=}T}=a$Aa?lco4A7OnXqb7+_#&sk{ zavKrm(;1UnVvcbcV?;F}_uMaYUh1r~*7@VC^T&DCdf#Wi&$IVl``N$!+t2$a!q!q6 zbQB~eCMFHLcEv$VY!3yH5&L%o@0-ap5x}H~uyM2yi9~=nyZDXP9Ptp4b{-~^>GB$f z0zUVLJJ=${0Hy8G+vcbzCU)Q??25S~PJD|0DKh^;<^4}d-t-}n`n^h@cy*?XVkkM2 zQ@MEa*sbd84#htdKr2Kq#YAt##9YNhFPs7-Csq;yHy1wzQ8E{Iwx`7HvPI~{?JCRD zmymdt~SZ^be->4F`xap$N4|A0RECjyrsB`!BLQg&=(7J+LIW>JuV!d@~=jcbK zyve$liEyv@n=iY~j%3qdkt_%pz1|WA2 za*ML9`XfnPO8Td;imo!g)W;gNUe%XnZcKInc1GYnANnQ8L&i7md71%mu#ij0&VfwF z#=E$;SQ)`ohwi$Kt^)pUa+Zk?e=@sdHLgY**>y_p$WZDy3TL()v-jKJ9|5UOeL5qd zz1`JlX=?aTCYlrFl~thu896O;9VtYlDl9x2Do=c}(7|4cL<@A6zcM57v@YzcYARNx zJ8GhO-z7UAc>AmqftMIuHR#ieePE_^JiN399g*<8J^cN%HcC!xi4g|Q>GsHK+w2rB zPw$a(Im@QoUAa@Biau|*>b*h#-DQ5YA*;Lj{?c{Mbbzcg1DzqHkNeR!MAXjXyj#zzR(-jK)e31n+vWLY#N&I(-~Z z+uh>G1+ZnCW9r(Y&~W}j55=(AkX~%8NmqMo@_pGVTeaIiNe;^kerQ^Ad}8w8J}s9b zBLp8s;;!FuL7vN3$&^t-ZUd{5RMwJpL$uLwU>(`Fz$p-Z9qJ_9oc5w);hu(F>(oAE z6V3VDME*?ZRga0NkshbPe#;cbY-7%PtVeZ;feB*juL+98voI|wKT>vyOX)_lKGi=i zeh0NV1}4BmbfPi=Ue+2{rs6i0oZl2y(!QOyl`Mlr{A8Cb<6gZ+m0F}FMNLhoB*9;~ zy$ZDtR?p4!;kR0ZsQ2dcvI81tQ{%jUn^TiNW7dp;*z)-9Eb*(o8t5|wyk2sfiNzKOKj}= zr7E#~UJ3bG$!%CckP!t00FOL^shb0^0CXY zL)8oP8&z0E((dhMRgysvr25shP;?Frk=zS2Jo7TZc$OQt>ds16&M?971|50}y4L>2 zZ2U3Uv*Lwt5jFgz>6{77(8ysbcYe}vL$%9){7Qr)| zB>q!j`sgF!lK+@fmT%|_x9^UuybP=$fXu(Jq*4=9+?Y)xri`F&xmnl^ue+ocsZEzI zAtLx!P1130T4Am2DsLlj3YYztxjlJz#o@0|+^V7YnC_XV28v)5 z^t_EXRJq7+0NIk&Ig6YpO%eNI)T{Wts%3%5En&iv9%t%{0XN(sxKcXY8pmTe*pSOM z%e~2;59`v4`Ex9&pFZ0Xx^j#PdTc*ZiS;Ld(} zW;e+urDM2)RwA)HPtsz6(LC8R8n7E@-8fWRhRyjbst#ow-8R?}Gkq>h7UQH(!2i8t zEPD7RbzmLLU`HNTWtn8Jt(vZxTn%~|RJSb2e~OcJ5AY3PMSKdI;xylh2ui^f4953j zwfirthW&77dGAxO9KqAT*o!|=rl2lsu{LyA;MOy$+%&N0wB1ZBc$F-C-RBGIsrJdj zhy19wx)2D5&0{oK(e>LI6*U@Y>lA0L&bWS1d3~`*S!r%Sg$lMNec>xK_|Ix0fz1EL zbtJBu?SLy_&_wAPu*UeHYa1!O8m$;xsV!Moq^1UJX)YyRWLCaI@{6lacL==-`$=Ks zPqz0VfS4@E&+>LQ-z5IGj_@hfAZ5(x$gd`orO(wTjk!Gdxe_#QR;#p-chttk1^9y! OgIU;KDZBi8+&=-q^tC + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+
+ + +
int const cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::Rank = kRank
+ + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator Member List
+
+
+ +

This is the complete list of members for cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator, including all inherited members.

+ + + + + + + + + + + + +
ConstIterator(TensorArrayRef const &ref, int idx=0)cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator() constcutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator+(Index idx)cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator++()cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator++(int)cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator+=(Index idx)cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator-(Index idx)cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator--()cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator--(int)cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator-=(Index idx)cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
TensorRef typedefcutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator
+ + + + diff --git a/docs/classcutlass_1_1TensorRefArray_1_1ConstIterator.html b/docs/classcutlass_1_1TensorRefArray_1_1ConstIterator.html new file mode 100644 index 000000000..aa40085cb --- /dev/null +++ b/docs/classcutlass_1_1TensorRefArray_1_1ConstIterator.html @@ -0,0 +1,440 @@ + + + + + + + +Cutlass: cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator Class Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator Class Reference
+
+
+ +

TensorRefIterator over TensorRef objects in TensorRefArray. +

+ +

#include <tensor_ref_collection.h>

+ + + + + +

+Public Types

typedef Base TensorRef
 TensorRef returned by the iterator. More...
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

CUTLASS_HOST_DEVICE ConstIterator (TensorArrayRef const &ref, int idx=0)
 Constructs a ConstIterator over the TensorRef objects. More...
 
CUTLASS_HOST_DEVICE TensorRefoperator () const
 Obtains a TensorRef pointed to by this iterator. More...
 
CUTLASS_HOST_DEVICE ConstIteratoroperator++ ()
 Advances to next TensorRef. More...
 
CUTLASS_HOST_DEVICE ConstIterator operator++ (int)
 Advances to next TensorRef. More...
 
CUTLASS_HOST_DEVICE ConstIterator operator+ (Index idx)
 
CUTLASS_HOST_DEVICE ConstIteratoroperator+= (Index idx)
 
CUTLASS_HOST_DEVICE ConstIteratoroperator-- ()
 
CUTLASS_HOST_DEVICE ConstIterator operator-- (int)
 Advances to next TensorRef. More...
 
CUTLASS_HOST_DEVICE ConstIteratoroperator-= (Index idx)
 
CUTLASS_HOST_DEVICE ConstIterator operator- (Index idx)
 
+

Member Typedef Documentation

+ +

◆ TensorRef

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + +
typedef Base cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::TensorRef
+
+ +
+
+

Constructor & Destructor Documentation

+ +

◆ ConstIterator()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::ConstIterator (TensorArrayRef const & ref,
int idx = 0 
)
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ operator()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE TensorRef* cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator () const
+
+inline
+
+ +
+
+ +

◆ operator+()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator+ (Index idx)
+
+inline
+
+ +
+
+ +

◆ operator++() [1/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator& cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator++ ()
+
+inline
+
+ +
+
+ +

◆ operator++() [2/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator++ (int )
+
+inline
+
+ +
+
+ +

◆ operator+=()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator& cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator+= (Index idx)
+
+inline
+
+ +
+
+ +

◆ operator-()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator- (Index idx)
+
+inline
+
+ +
+
+ +

◆ operator--() [1/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator& cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator-- ()
+
+inline
+
+ +
+
+ +

◆ operator--() [2/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator-- (int )
+
+inline
+
+ +
+
+ +

◆ operator-=()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator& cutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator-= (Index idx)
+
+inline
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/classcutlass_1_1TensorRefBatchStrided_1_1ConstIterator-members.html b/docs/classcutlass_1_1TensorRefBatchStrided_1_1ConstIterator-members.html new file mode 100644 index 000000000..bb3876187 --- /dev/null +++ b/docs/classcutlass_1_1TensorRefBatchStrided_1_1ConstIterator-members.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator Member List
+
+
+ +

This is the complete list of members for cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator, including all inherited members.

+ + + + + + + + + + + + + +
ConstIterator(TensorRefBatchStrided const &ref, LongIndex offset=0)cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator() constcutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator+(Index idx)cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator++()cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator++(int)cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator+=(Index idx)cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator-(Index idx)cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator-(ConstIterator const &it)cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator--()cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator--(int)cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
operator-=(Index idx)cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorinline
TensorRef typedefcutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator
+ + + + diff --git a/docs/classcutlass_1_1TensorRefBatchStrided_1_1ConstIterator.html b/docs/classcutlass_1_1TensorRefBatchStrided_1_1ConstIterator.html new file mode 100644 index 000000000..c3dbd9dfc --- /dev/null +++ b/docs/classcutlass_1_1TensorRefBatchStrided_1_1ConstIterator.html @@ -0,0 +1,476 @@ + + + + + + + +Cutlass: cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator Class Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator Class Reference
+
+
+ +

Constant iterator over tensors implied by TensorRefBatchStrided. +

+ +

#include <tensor_ref_collection.h>

+ + + + + +

+Public Types

typedef Base TensorRef
 TensorRef returned by the iterator. More...
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

CUTLASS_HOST_DEVICE ConstIterator (TensorRefBatchStrided const &ref, LongIndex offset=0)
 Constructs a ConstIterator from a parent TensorRefBatchStrided. More...
 
CUTLASS_HOST_DEVICE TensorRefoperator () const
 Obtains a TensorRef pointed to by the iterator. More...
 
CUTLASS_HOST_DEVICE ConstIteratoroperator++ ()
 Advances the iterator to point to the next tensor. More...
 
CUTLASS_HOST_DEVICE ConstIterator operator++ (int)
 Advances the iterator to point to the next tensor. More...
 
CUTLASS_HOST_DEVICE ConstIterator operator+ (Index idx)
 Returns an iterator advanced by (idx) amount. More...
 
CUTLASS_HOST_DEVICE ConstIteratoroperator+= (Index idx)
 Advances this iterator by (idx) and returns a reference to self. More...
 
CUTLASS_HOST_DEVICE ConstIteratoroperator-- ()
 Moves to the previous tensor. More...
 
CUTLASS_HOST_DEVICE ConstIterator operator-- (int)
 Moves to the previous tensor. More...
 
CUTLASS_HOST_DEVICE ConstIterator operator- (Index idx)
 Returns an iterator moved forward by (idx) amount. More...
 
CUTLASS_HOST_DEVICE ConstIteratoroperator-= (Index idx)
 Moves this iterator by (idx) and returns a reference to self. More...
 
CUTLASS_HOST_DEVICE Stride operator- (ConstIterator const &it)
 Returns the difference in offset between two iterators. More...
 
+

Member Typedef Documentation

+ +

◆ TensorRef

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + +
typedef Base cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::TensorRef
+
+ +
+
+

Constructor & Destructor Documentation

+ +

◆ ConstIterator()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::ConstIterator (TensorRefBatchStrided const & ref,
LongIndex offset = 0 
)
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ operator()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE TensorRef* cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator () const
+
+inline
+
+ +
+
+ +

◆ operator+()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator+ (Index idx)
+
+inline
+
+ +
+
+ +

◆ operator++() [1/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator& cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator++ ()
+
+inline
+
+ +
+
+ +

◆ operator++() [2/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator++ (int )
+
+inline
+
+ +
+
+ +

◆ operator+=()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator& cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator+= (Index idx)
+
+inline
+
+ +
+
+ +

◆ operator-() [1/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator- (Index idx)
+
+inline
+
+ +
+
+ +

◆ operator-() [2/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Stride cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator- (ConstIterator const & it)
+
+inline
+
+ +
+
+ +

◆ operator--() [1/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator& cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator-- ()
+
+inline
+
+ +
+
+ +

◆ operator--() [2/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator-- (int )
+
+inline
+
+ +
+
+ +

◆ operator-=()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator& cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIterator::operator-= (Index idx)
+
+inline
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/classcutlass_1_1TensorRef_3_01Storage___00_01Rank___00_01MapFunc___00_011_00_01Index___00_01LongIndex___01_4-members.html b/docs/classcutlass_1_1TensorRef_3_01Storage___00_01Rank___00_01MapFunc___00_011_00_01Index___00_01LongIndex___01_4-members.html new file mode 100644 index 000000000..8af74ab9b --- /dev/null +++ b/docs/classcutlass_1_1TensorRef_3_01Storage___00_01Rank___00_01MapFunc___00_011_00_01Index___00_01LongIndex___01_4-members.html @@ -0,0 +1,124 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ > Member List
+
+
+ +

This is the complete list of members for cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >, including all inherited members.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
add_pointer_offset(LongIndex delta)cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
at(TensorCoord const &coord) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
at(LongIndex idx) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
const_ref() constcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
ConstTensorRef typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >
Coord_t typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >
data() constcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
good() constcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
Index typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >
kRankcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >static
kStorageRankcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >static
leading_dim(int idx=0) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
LongIndex typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >
map(TensorCoord const &coord) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
MapFunc typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >
offset(TensorCoord const &coord) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
operator+(TensorCoord const &b) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
operator+=(TensorCoord const &b)cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
operator-(TensorCoord const &b) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
operator-=(TensorCoord const &b)cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
operator[](TensorCoord const &coord) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
operator[](LongIndex idx) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
Rankcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >static
reset(Storage *ptr=nullptr)cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
reset(Storage *ptr, StorageCoord const &stride)cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
Storage typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >
StorageCoord typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >
stride() constcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
stride(int dim) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
TensorCoord typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >
TensorRef(Storage *ptr=nullptr)cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
TensorRef(Storage *ptr, StrideVector const &stride)cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
TensorRef(Storage *ptr, StorageCoord const &stride)cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
TensorRef(TensorRef< typename platform::remove_const< Storage >::type, kRank, MapFunc, kStorageRank, Index, LongIndex > const &ref)cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >inline
+ + + + diff --git a/docs/classcutlass_1_1TensorRef_3_01Storage___00_01Rank___00_01MapFunc___00_011_00_01Index___00_01LongIndex___01_4.html b/docs/classcutlass_1_1TensorRef_3_01Storage___00_01Rank___00_01MapFunc___00_011_00_01Index___00_01LongIndex___01_4.html new file mode 100644 index 000000000..2dfd10c99 --- /dev/null +++ b/docs/classcutlass_1_1TensorRef_3_01Storage___00_01Rank___00_01MapFunc___00_011_00_01Index___00_01LongIndex___01_4.html @@ -0,0 +1,1092 @@ + + + + + + + +Cutlass: cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ > Class Template Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ > Class Template Reference
+
+
+ +

Specialization for rank=1 case with no internal StrideVector. +

+ +

#include <tensor_ref.h>

+ + + + +

+Classes

struct  StrideVector
 
+ + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Types

typedef Storage_ Storage
 Data type of individual access. More...
 
typedef MapFunc_ MapFunc
 Mapping function from logical coordinate to internal n-D array. More...
 
typedef Index_ Index
 Index type. More...
 
typedef LongIndex_ LongIndex
 Typically, strides in memory can be very large. More...
 
typedef Coord< kRankTensorCoord
 Coordinate in logical tensor space. More...
 
typedef Coord< kStorageRankStorageCoord
 Coordinate in storage n-D array. More...
 
typedef TensorRef< typename platform::remove_const< Storage >::type const, Rank_, MapFunc_, kStorageRank, Index_, LongIndex_ > ConstTensorRef
 Tensor reference to of constant value. More...
 
typedef TensorCoord Coord_t
 Coordinate in logical tensor space. More...
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

CUTLASS_HOST_DEVICE TensorRef (Storage *ptr=nullptr)
 Helper for 1-D memory. All higher ranks are projected onto the fastest changing rank. More...
 
CUTLASS_HOST_DEVICE TensorRef (Storage *ptr, StrideVector const &stride)
 Constructs from a single pointer and stride vector. More...
 
CUTLASS_HOST_DEVICE TensorRef (Storage *ptr, StorageCoord const &stride)
 
CUTLASS_HOST_DEVICE TensorRef (TensorRef< typename platform::remove_const< Storage >::type, kRank, MapFunc, kStorageRank, Index, LongIndex > const &ref)
 Enables conversion from TensorRef of non-const type. More...
 
CUTLASS_HOST_DEVICE ConstTensorRef const_ref () const
 Returns a reference to constant-valued tensor. More...
 
CUTLASS_HOST_DEVICE void reset (Storage *ptr=nullptr)
 Updates only the pointer. More...
 
CUTLASS_HOST_DEVICE void reset (Storage *ptr, StorageCoord const &stride)
 Updates the pointer, stride, and location within a TensorRef. More...
 
CUTLASS_HOST_DEVICE bool good () const
 Returns true if the TensorRef may be safely accessed. More...
 
CUTLASS_HOST_DEVICE Storagedata () const
 Returns the pointer to referenced data. More...
 
CUTLASS_HOST_DEVICE StorageCoord stride () const
 Returns the stride of the tensor. More...
 
CUTLASS_HOST_DEVICE Index stride (int dim) const
 Returns the stride of the tensor in the given dimension. More...
 
CUTLASS_HOST_DEVICE Index leading_dim (int idx=0) const
 Returns the maximum stride element as the 'leading dimension'. More...
 
CUTLASS_HOST_DEVICE StorageCoord map (TensorCoord const &coord) const
 Maps a logical coordinate to an n-D array in memory. More...
 
CUTLASS_HOST_DEVICE LongIndex offset (TensorCoord const &coord) const
 Computes the offset of an index from the origin of the tensor. More...
 
CUTLASS_HOST_DEVICE Storageat (TensorCoord const &coord) const
 Returns a reference to the element at a given Coord. More...
 
CUTLASS_HOST_DEVICE Storageat (LongIndex idx) const
 Returns a reference to the element at a given linear index. More...
 
CUTLASS_HOST_DEVICE Storageoperator[] (TensorCoord const &coord) const
 Returns a reference to the element at a given Coord. More...
 
CUTLASS_HOST_DEVICE Storageoperator[] (LongIndex idx) const
 Returns a reference to the element at a given linear index. More...
 
CUTLASS_HOST_DEVICE TensorRefadd_pointer_offset (LongIndex delta)
 Adds an offset to each pointer. More...
 
CUTLASS_HOST_DEVICE TensorRef operator+ (TensorCoord const &b) const
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorRefoperator+= (TensorCoord const &b)
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorRef operator- (TensorCoord const &b) const
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorRefoperator-= (TensorCoord const &b)
 Returns a TensorRef offset by a given amount. More...
 
+ + + + + + + + + + +

+Static Public Attributes

static int const kRank = Rank_
 Logical rank of tensor index space. More...
 
static int const kStorageRank = 1
 Rank of internal storage. More...
 
static int const Rank = kRank
 Logical rank of tensor index space. More...
 
+

Member Typedef Documentation

+ +

◆ ConstTensorRef

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + +
typedef TensorRef< typename platform::remove_const<Storage>::type const, Rank_, MapFunc_, kStorageRank, Index_, LongIndex_> cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::ConstTensorRef
+
+ +
+
+ +

◆ Coord_t

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + +
typedef TensorCoord cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::Coord_t
+
+ +
+
+ +

◆ Index

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + +
typedef Index_ cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::Index
+
+ +
+
+ +

◆ LongIndex

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + +
typedef LongIndex_ cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::LongIndex
+
+ +
+
+ +

◆ MapFunc

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + +
typedef MapFunc_ cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::MapFunc
+
+ +
+
+ +

◆ Storage

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + +
typedef Storage_ cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::Storage
+
+ +
+
+ +

◆ StorageCoord

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + +
typedef Coord<kStorageRank> cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::StorageCoord
+
+ +
+
+ +

◆ TensorCoord

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + +
typedef Coord<kRank> cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::TensorCoord
+
+ +
+
+

Constructor & Destructor Documentation

+ +

◆ TensorRef() [1/4]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::TensorRef (Storageptr = nullptr)
+
+inline
+
+ +
+
+ +

◆ TensorRef() [2/4]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::TensorRef (Storageptr,
StrideVector const & stride 
)
+
+inline
+
+ +
+
+ +

◆ TensorRef() [3/4]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::TensorRef (Storageptr,
StorageCoord const & stride 
)
+
+inline
+
+

Constructs from a pointer and a stride vector of size kRank. If fastest changing stride is not 1, construction fails and subsequent calls to good() will return false.

+ +
+
+ +

◆ TensorRef() [4/4]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::TensorRef (TensorRef< typename platform::remove_const< Storage >::type, kRank, MapFunc, kStorageRank, Index, LongIndex > const & ref)
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ add_pointer_offset()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorRef& cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::add_pointer_offset (LongIndex delta)
+
+inline
+
+ +
+
+ +

◆ at() [1/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Storage& cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::at (TensorCoord const & coord) const
+
+inline
+
+ +
+
+ +

◆ at() [2/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Storage& cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::at (LongIndex idx) const
+
+inline
+
+ +
+
+ +

◆ const_ref()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE ConstTensorRef cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::const_ref () const
+
+inline
+
+ +
+
+ +

◆ data()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Storage* cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::data () const
+
+inline
+
+ +
+
+ +

◆ good()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::good () const
+
+inline
+
+ +
+
+ +

◆ leading_dim()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Index cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::leading_dim (int idx = 0) const
+
+inline
+
+ +
+
+ +

◆ map()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE StorageCoord cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::map (TensorCoord const & coord) const
+
+inline
+
+ +
+
+ +

◆ offset()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE LongIndex cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::offset (TensorCoord const & coord) const
+
+inline
+
+ +
+
+ +

◆ operator+()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorRef cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::operator+ (TensorCoord const & b) const
+
+inline
+
+ +
+
+ +

◆ operator+=()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorRef& cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::operator+= (TensorCoord const & b)
+
+inline
+
+ +
+
+ +

◆ operator-()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorRef cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::operator- (TensorCoord const & b) const
+
+inline
+
+ +
+
+ +

◆ operator-=()

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorRef& cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::operator-= (TensorCoord const & b)
+
+inline
+
+ +
+
+ +

◆ operator[]() [1/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Storage& cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::operator[] (TensorCoord const & coord) const
+
+inline
+
+ +
+
+ +

◆ operator[]() [2/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Storage& cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::operator[] (LongIndex idx) const
+
+inline
+
+ +
+
+ +

◆ reset() [1/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE void cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::reset (Storageptr = nullptr)
+
+inline
+
+ +
+
+ +

◆ reset() [2/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE void cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::reset (Storageptr,
StorageCoord const & stride 
)
+
+inline
+
+ +
+
+ +

◆ stride() [1/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE StorageCoord cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::stride () const
+
+inline
+
+ +
+
+ +

◆ stride() [2/2]

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Index cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::stride (int dim) const
+
+inline
+
+ +
+
+

Member Data Documentation

+ +

◆ kRank

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + +
int const cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::kRank = Rank_
+
+static
+
+ +
+
+ +

◆ kStorageRank

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + +
int const cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::kStorageRank = 1
+
+static
+
+ +
+
+ +

◆ Rank

+ +
+
+
+template<typename Storage_ , int Rank_, typename MapFunc_ , typename Index_ , typename LongIndex_ >
+ + + + + +
+ + + + +
int const cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::Rank = kRank
+
+static
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/classcutlass_1_1TensorView-members.html b/docs/classcutlass_1_1TensorView-members.html index e9401f9cc..9f5c32535 100644 --- a/docs/classcutlass_1_1TensorView-members.html +++ b/docs/classcutlass_1_1TensorView-members.html @@ -73,51 +73,70 @@ $(function() {
-
cutlass::TensorView< T > Member List
+
cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > Member List
-

This is the complete list of members for cutlass::TensorView< T >, including all inherited members.

+

This is the complete list of members for cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >, including all inherited members.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
advance(Coord< Rank > const &b)cutlass::TensorRef< T, 4 >inline
at(Coord_t const &coord) constcutlass::TensorView< T >inline
at(Offset_t idx) constcutlass::TensorView< T >inline
Base typedefcutlass::TensorView< T >
const_ref()cutlass::TensorView< T >inline
ConstTensorRef_t typedefcutlass::TensorView< T >
contains(Coord_t const &coord) constcutlass::TensorView< T >inline
convert()cutlass::TensorRef< T, 4 >inline
Coord_t typedefcutlass::TensorView< T >
data() constcutlass::TensorView< T >inline
good() constcutlass::TensorView< T >inline
leading_dim() constcutlass::TensorRef< T, 4 >inline
offset(Coord_t const &coord) constcutlass::TensorView< T >inline
Offset_t typedefcutlass::TensorView< T >
operator+(Coord< Rank > const &b) constcutlass::TensorRef< T, 4 >inline
operator-(Coord< Rank > const &b) constcutlass::TensorRef< T, 4 >inline
operator=(TensorView const &_tensor)cutlass::TensorView< T >inline
operator[](Coord< Rank > const &coord) constcutlass::TensorView< T >inline
TensorRef< T, 4 >::operator[](int idx) constcutlass::TensorRef< T, 4 >inline
Rankcutlass::TensorView< T >static
ref()cutlass::TensorView< T >inline
ref() constcutlass::TensorView< T >inline
reset(TensorRef_t const &_ref=TensorRef_t(0), Coord_t const &_size=Coord_t())cutlass::TensorView< T >inline
TensorRef< T, 4 >::reset(Storage *ptr=nullptr, Coord< Rank > stride=Coord< Rank >(0))cutlass::TensorRef< T, 4 >inline
size() constcutlass::TensorView< T >inline
size(int dim) constcutlass::TensorView< T >inline
Storage typedefcutlass::TensorRef< T, 4 >
stride() constcutlass::TensorView< T >inline
stride(int dim) constcutlass::TensorView< T >inline
subview(Coord_t const &location, Coord_t size) constcutlass::TensorView< T >inline
TensorRef()cutlass::TensorRef< T, 4 >inline
TensorRef(Storage *ptr, Coord< Rank > stride)cutlass::TensorRef< T, 4 >inline
TensorRef_t typedefcutlass::TensorView< T >
TensorView()cutlass::TensorView< T >inline
TensorView(TensorRef_t const &_ref, Coord_t const &_size)cutlass::TensorView< T >inline
add_pointer_offset(LongIndex delta)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
at(TensorCoord const &coord) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
at(LongIndex idx) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
Base typedefcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
capacity() constcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
const_ref() constcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
ConstTensorRef typedefcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
ConstTensorRef_t typedefcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
ConstTensorView typedefcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
contains(TensorCoord const &coord) constcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
Coord_t typedefcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
data() constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
good() constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
Index typedefcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
kRankcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >static
kStorageRankcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >static
leading_dim(int idx=0) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
LongIndex typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
map(TensorCoord const &coord) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
MapFunc typedefcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
offset(TensorCoord const &coord) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
Offset_t typedefcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
operator+(TensorCoord const &b) constcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
operator+=(TensorCoord const &b)cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
operator-(TensorCoord const &b) constcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
operator-=(TensorCoord const &b)cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
operator=(TensorView const &_tensor)cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
operator[](TensorCoord const &coord) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
operator[](LongIndex idx) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
Rankcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >static
ref() constcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
reset(Base const &_ref=Base(), TensorCoord const &_size=TensorCoord())cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
cutlass::TensorRef::reset(Storage *ptr=nullptr)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
cutlass::TensorRef::reset(Storage *ptr, StorageCoord const &stride)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
size() constcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
size(int dim) constcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
Storage typedefcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
StorageCoord typedefcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
stride() constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
stride(int dim) constcutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
StrideVector typedefcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
subview(TensorCoord const &location, TensorCoord size) constcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
TensorCoord typedefcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
TensorRef typedefcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
cutlass::TensorRef::TensorRef(Storage *ptr=nullptr)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
cutlass::TensorRef::TensorRef(Storage *ptr, Index ldm)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
cutlass::TensorRef::TensorRef(Storage *ptr, StrideVector const &stride)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
cutlass::TensorRef::TensorRef(Storage *ptr, StorageCoord const &stride)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
cutlass::TensorRef::TensorRef(TensorRef< typename platform::remove_const< Storage >::type, kRank, MapFunc, kStorageRank, Index, LongIndex > const &ref)cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
TensorRef_t typedefcutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
TensorView()cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
TensorView(Base const &_ref, TensorCoord const &_size)cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
TensorView(Storage *ptr, StrideVector const &stride, TensorCoord const &size)cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
TensorView(Storage *ptr, StorageCoord const &stride, TensorCoord const &size)cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >inline
diff --git a/docs/classcutlass_1_1TensorView.html b/docs/classcutlass_1_1TensorView.html index 7dba23228..276d1077d 100644 --- a/docs/classcutlass_1_1TensorView.html +++ b/docs/classcutlass_1_1TensorView.html @@ -5,7 +5,7 @@ -Cutlass: cutlass::TensorView< T > Class Template Reference +Cutlass: cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > Class Template Reference @@ -78,242 +78,438 @@ $(function() { Static Public Attributes | List of all members
-
cutlass::TensorView< T > Class Template Reference
+
cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > Class Template Reference
-

Host-side reference implementation of tensor operations. +

Defines a view into a logical tensor.

#include <tensor_view.h>

-Inheritance diagram for cutlass::TensorView< T >:
+Inheritance diagram for cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >:
- - -cutlass::TensorRef< T, 4 > + + +cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
- - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Public Types

typedef TensorRef< T, 4 > Base
 Reference and stride. More...
 
typedef Base TensorRef_t
 Reference and stride. More...
 
typedef TensorRef< T const, 4 > ConstTensorRef_t
 Reference to constant type. More...
 
typedef int Offset_t
 Type used to compute the offset of an element to the base of a tensor. More...
 
typedef Coord< RankCoord_t
 Coordinate into tensor. More...
 
- Public Types inherited from cutlass::TensorRef< T, 4 >
typedef T Storage
 Data type of individual access. More...
 
typedef TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > Base
 Base tensor reference. More...
 
typedef TensorRef< typename platform::remove_const< Storage_ >::type const, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > ConstTensorRef
 Tensor reference to of constant value. More...
 
typedef Base TensorRef
 Base tensor reference. More...
 
typedef Base::Storage Storage
 Storage type. More...
 
typedef Base::Index Index
 Index type. More...
 
typedef TensorRef::TensorCoord TensorCoord
 Coordinate in logical tensor space. More...
 
typedef TensorRef::StorageCoord StorageCoord
 Coordinate in storage n-D array. More...
 
typedef TensorRef::StrideVector StrideVector
 
typedef TensorView< typename platform::remove_const< Storage >::type const, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > ConstTensorView
 TensorView of constant value. More...
 
typedef TensorCoord Coord_t
 Coordinate in logical tensor space. More...
 
typedef Base::LongIndex Offset_t
 Type used to compute the offset of an element to the base of a tensor. More...
 
typedef TensorRef TensorRef_t
 Base class. More...
 
typedef TensorRef::ConstTensorRef ConstTensorRef_t
 TensorRef to const-valued type. More...
 
- Public Types inherited from cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
typedef Storage_ Storage
 Data type of individual access. More...
 
typedef MapFunc_ MapFunc
 Mapping function from logical coordinate to internal n-D array. More...
 
typedef Index_ Index
 Index type. More...
 
typedef LongIndex_ LongIndex
 Typically, strides in memory can be very large. More...
 
typedef Coord< kRankTensorCoord
 Coordinate in logical tensor space. More...
 
typedef Coord< kStorageRankStorageCoord
 Coordinate in storage n-D array. More...
 
typedef Coord< kStorageRank - 1 > StrideVector
 
typedef TensorRef< typename platform::remove_const< Storage >::type const, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > ConstTensorRef
 Tensor reference to of constant value. More...
 
typedef TensorCoord Coord_t
 Coordinate in logical tensor space. More...
 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Public Member Functions

CUTLASS_HOST_DEVICE TensorView ()
 Default constructor. More...
 
CUTLASS_HOST_DEVICE TensorView (TensorRef_t const &_ref, Coord_t const &_size)
 Constructs a Tensor_view from a TensorRef and size. More...
 
CUTLASS_HOST_DEVICE bool good () const
 Returns true if the Tensor_view is bound to some memory. More...
 
CUTLASS_HOST_DEVICE T * data () const
 Returns a pointer to data. More...
 
CUTLASS_HOST_DEVICE void reset (TensorRef_t const &_ref=TensorRef_t(0), Coord_t const &_size=Coord_t())
 Updates the reference and size of a Tensor_view object. More...
 
CUTLASS_HOST_DEVICE TensorRef_tref ()
 Accesses the tensor reference pointing to data. More...
 
CUTLASS_HOST_DEVICE ConstTensorRef_t const_ref ()
 
CUTLASS_HOST_DEVICE TensorRef_t const & ref () const
 Accesses the tensor reference pointing to data. More...
 
CUTLASS_HOST_DEVICE Coord_t const & size () const
 Accesses the size. More...
 
CUTLASS_HOST_DEVICE int size (int dim) const
 Accesses the size. More...
 
CUTLASS_HOST_DEVICE Coord_t const & stride () const
 Accesses the stride. More...
 
CUTLASS_HOST_DEVICE int const & stride (int dim) const
 Accesses the stride. More...
 
CUTLASS_HOST_DEVICE TensorViewoperator= (TensorView const &_tensor)
 Assigns the Tensor_view. More...
 
CUTLASS_HOST_DEVICE Offset_t offset (Coord_t const &coord) const
 Returns the index of an element. More...
 
CUTLASS_HOST_DEVICE bool contains (Coord_t const &coord) const
 Determines whether a location is within a tensor. More...
 
CUTLASS_HOST_DEVICE T & at (Coord_t const &coord) const
 Element-wise accessor. More...
 
T & operator[] (Coord< Rank > const &coord) const
 Element-wise accessor. More...
 
CUTLASS_HOST_DEVICE T & at (Offset_t idx) const
 Element-wise accessor. More...
 
CUTLASS_HOST_DEVICE TensorView< T > subview (Coord_t const &location, Coord_t size) const
 Returns a Tensor_view given location and size quantities. More...
 
- Public Member Functions inherited from cutlass::TensorRef< T, 4 >
CUTLASS_HOST_DEVICE TensorRef ()
 Default ctor. More...
 
CUTLASS_HOST_DEVICE TensorRef (Storage *ptr, Coord< Rank > stride)
 Constructs from a pointer, size, and stride. More...
 
CUTLASS_HOST_DEVICE void reset (Storage *ptr=nullptr, Coord< Rank > stride=Coord< Rank >(0))
 Updates the pointer, stride, and location within a TensorRef. More...
 
TensorRef< T, Rankconvert ()
 Conversion function. More...
 
CUTLASS_HOST_DEVICE bool good () const
 Returns true if the TensorRef may be safely accessed. More...
 
CUTLASS_HOST_DEVICE Storagedata () const
 Returns the pointer to referenced data. More...
 
CUTLASS_HOST_DEVICE Coord< Rank > const & stride () const
 Returns the stride of the tensor. More...
 
CUTLASS_HOST_DEVICE int const & stride (int dim) const
 Returns the stride of the tensor in the given dimension. More...
 
CUTLASS_HOST_DEVICE int leading_dim () const
 Returns the maximum stride element as the 'leading dimension'. More...
 
CUTLASS_HOST_DEVICE long long offset (Coord< Rank > const &coord) const
 Computes the offset of an index from the origin of the tensor. More...
 
CUTLASS_HOST_DEVICE Storageat (Coord< Rank > const &coord) const
 Returns a reference to the element at a given Coord. More...
 
CUTLASS_HOST_DEVICE Storageat (int idx) const
 Returns a reference to the element at a given Coord. More...
 
Storageoperator[] (Coord< Rank > const &coord) const
 Element-wise accessor. More...
 
Storageoperator[] (int idx) const
 Element-wise accessor. More...
 
CUTLASS_HOST_DEVICE TensorRefadvance (Coord< Rank > const &b)
 Adds an offset to the pointer. More...
 
CUTLASS_HOST_DEVICE TensorRef operator+ (Coord< Rank > const &b) const
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorRef operator- (Coord< Rank > const &b) const
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorView ()
 Default constructor. More...
 
CUTLASS_HOST_DEVICE TensorView (Base const &_ref, TensorCoord const &_size)
 Constructs a TensorView from a TensorRef and size. More...
 
CUTLASS_HOST_DEVICE TensorView (Storage *ptr, StrideVector const &stride, TensorCoord const &size)
 Constructs a TensorView from a pointer, a stride vector, and size. More...
 
CUTLASS_HOST_DEVICE TensorView (Storage *ptr, StorageCoord const &stride, TensorCoord const &size)
 Constructs a TensorView from a pointer, a stride vector, and size. More...
 
CUTLASS_HOST_DEVICE void reset (Base const &_ref=Base(), TensorCoord const &_size=TensorCoord())
 Updates the reference and size of a Tensor_view object. More...
 
CUTLASS_HOST_DEVICE TensorCoord const & size () const
 Accesses the size. More...
 
CUTLASS_HOST_DEVICE Index size (int dim) const
 Accesses the size. More...
 
CUTLASS_HOST_DEVICE TensorViewoperator= (TensorView const &_tensor)
 Assigns the Tensor_view. More...
 
CUTLASS_HOST_DEVICE bool contains (TensorCoord const &coord) const
 Determines whether a location is within a tensor. More...
 
CUTLASS_HOST_DEVICE TensorRef ref () const
 Returns a TensorRef pointing to the first element of the tensor. More...
 
CUTLASS_HOST_DEVICE ConstTensorRef const_ref () const
 Returns a TensorRef pointing to the first element of the tensor. More...
 
CUTLASS_HOST_DEVICE TensorView subview (TensorCoord const &location, TensorCoord size) const
 Returns a Tensor_view given location and size quantities. More...
 
CUTLASS_HOST_DEVICE size_t capacity () const
 Returns the number of scalar elements needed to store tensor. More...
 
CUTLASS_HOST_DEVICE TensorView operator+ (TensorCoord const &b) const
 Returns a TensorView offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorViewoperator+= (TensorCoord const &b)
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorView operator- (TensorCoord const &b) const
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorViewoperator-= (TensorCoord const &b)
 Returns a TensorRef offset by a given amount. More...
 
- Public Member Functions inherited from cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
CUTLASS_HOST_DEVICE TensorRef (Storage *ptr=nullptr)
 Helper for 1-D memory. All higher ranks are projected onto the fastest changing rank. More...
 
CUTLASS_HOST_DEVICE TensorRef (Storage *ptr, Index ldm)
 Helper to construct from a pointer and single stride element for 2-D pitch linear memory. More...
 
CUTLASS_HOST_DEVICE TensorRef (Storage *ptr, StrideVector const &stride)
 Constructs from a single pointer and stride vector. More...
 
CUTLASS_HOST_DEVICE TensorRef (Storage *ptr, StorageCoord const &stride)
 
CUTLASS_HOST_DEVICE TensorRef (TensorRef< typename platform::remove_const< Storage >::type, kRank, MapFunc, kStorageRank, Index, LongIndex > const &ref)
 Enables conversion from TensorRef of non-const type. More...
 
CUTLASS_HOST_DEVICE ConstTensorRef const_ref () const
 Returns a reference to constant-valued tensor. More...
 
CUTLASS_HOST_DEVICE void reset (Storage *ptr=nullptr)
 Updates only the pointer. More...
 
CUTLASS_HOST_DEVICE void reset (Storage *ptr, StorageCoord const &stride)
 Updates the pointer, stride, and location within a TensorRef. More...
 
CUTLASS_HOST_DEVICE bool good () const
 Returns true if the TensorRef may be safely accessed. More...
 
CUTLASS_HOST_DEVICE Storagedata () const
 Returns the pointer to referenced data. More...
 
CUTLASS_HOST_DEVICE StorageCoord stride () const
 Returns the stride of the tensor. More...
 
CUTLASS_HOST_DEVICE Index stride (int dim) const
 Returns the stride of the tensor in the given dimension. More...
 
CUTLASS_HOST_DEVICE Index leading_dim (int idx=0) const
 Returns the maximum stride element as the 'leading dimension'. More...
 
CUTLASS_HOST_DEVICE StorageCoord map (TensorCoord const &coord) const
 Maps a logical coordinate to an n-D array in memory. More...
 
CUTLASS_HOST_DEVICE LongIndex offset (TensorCoord const &coord) const
 Computes the offset of an index from the origin of the tensor. More...
 
CUTLASS_HOST_DEVICE Storageat (TensorCoord const &coord) const
 Returns a reference to the element at a given Coord. More...
 
CUTLASS_HOST_DEVICE Storageat (LongIndex idx) const
 Returns a reference to the element at a given linear index. More...
 
CUTLASS_HOST_DEVICE Storageoperator[] (TensorCoord const &coord) const
 Returns a reference to the element at a given Coord. More...
 
CUTLASS_HOST_DEVICE Storageoperator[] (LongIndex idx) const
 Returns a reference to the element at a given linear index. More...
 
CUTLASS_HOST_DEVICE TensorRefadd_pointer_offset (LongIndex delta)
 Adds an offset to each pointer. More...
 
CUTLASS_HOST_DEVICE TensorRef operator+ (TensorCoord const &b) const
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorRefoperator+= (TensorCoord const &b)
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorRef operator- (TensorCoord const &b) const
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorRefoperator-= (TensorCoord const &b)
 Returns a TensorRef offset by a given amount. More...
 
- - - - - - - + + + + + + + + + + + + +

Static Public Attributes

static int const Rank = TensorRef_t::Rank
 Rank of tensor. More...
 
- Static Public Attributes inherited from cutlass::TensorRef< T, 4 >
static int const Rank
 Rank of tensor. More...
 
static int const Rank = Base::kRank
 Logical rank of tensor index space. More...
 
- Static Public Attributes inherited from cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
static int const kRank = Rank_
 Logical rank of tensor index space. More...
 
static int const kStorageRank = StorageRank_
 Rank of internal storage. More...
 
static int const Rank = kRank
 Logical rank of tensor index space. More...
 

Member Typedef Documentation

- -

◆ Base

+ +

◆ Base

-template<typename T>
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
- +
typedef TensorRef<T, 4> cutlass::TensorView< T >::Basetypedef TensorRef<Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_> cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::Base
- -

◆ ConstTensorRef_t

+ +

◆ ConstTensorRef

-template<typename T>
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
- +
typedef TensorRef<T const, 4> cutlass::TensorView< T >::ConstTensorRef_ttypedef TensorRef< typename platform::remove_const<Storage_>::type const, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_> cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstTensorRef
- -

◆ Coord_t

+ +

◆ ConstTensorRef_t

-template<typename T>
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
- +
typedef Coord<Rank> cutlass::TensorView< T >::Coord_ttypedef TensorRef::ConstTensorRef cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstTensorRef_t
- -

◆ Offset_t

+ +

◆ ConstTensorView

-template<typename T>
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
- +
typedef int cutlass::TensorView< T >::Offset_ttypedef TensorView< typename platform::remove_const<Storage>::type const, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_> cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstTensorView
- -

◆ TensorRef_t

+ +

◆ Coord_t

-template<typename T>
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
- + + +
typedef Base cutlass::TensorView< T >::TensorRef_ttypedef TensorCoord cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::Coord_t
+
+ +
+ + +

◆ Index

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + +
typedef Base::Index cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::Index
+
+ +
+
+ +

◆ Offset_t

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + +
typedef Base::LongIndex cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::Offset_t
+
+ +
+
+ +

◆ Storage

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + +
typedef Base::Storage cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::Storage
+
+ +
+
+ +

◆ StorageCoord

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + +
typedef TensorRef::StorageCoord cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::StorageCoord
+
+ +
+
+ +

◆ StrideVector

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + +
typedef TensorRef::StrideVector cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::StrideVector
+
+

Stride vector in storage coordinate space Least significant stride is = 1 and not stored

+ +
+
+ +

◆ TensorCoord

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + +
typedef TensorRef::TensorCoord cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::TensorCoord
+
+ +
+
+ +

◆ TensorRef

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + +
typedef Base cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::TensorRef
+
+ +
+
+ +

◆ TensorRef_t

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + +
typedef TensorRef cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::TensorRef_t
@@ -321,19 +517,19 @@ template<typename T>

Constructor & Destructor Documentation

- -

◆ TensorView() [1/2]

+ +

◆ TensorView() [1/4]

-template<typename T>
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + + + + + + + + + + + @@ -109,6 +124,9 @@ Files + + + @@ -124,9 +142,6 @@ Files - - - @@ -145,12 +160,21 @@ Files + + + + + + + + + @@ -170,7 +194,7 @@ Files diff --git a/docs/dir_c5917a9a879e9a6c73eaf5237444ab84.html b/docs/dir_c5917a9a879e9a6c73eaf5237444ab84.html index a66eb22fa..9011cf40c 100644 --- a/docs/dir_c5917a9a879e9a6c73eaf5237444ab84.html +++ b/docs/dir_c5917a9a879e9a6c73eaf5237444ab84.html @@ -79,12 +79,16 @@ $(function() {
- + @@ -348,27 +544,27 @@ template<typename T> - -

◆ TensorView() [2/2]

+ +

◆ TensorView() [2/4]

-template<typename T>
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
CUTLASS_HOST_DEVICE cutlass::TensorView< T >::TensorView CUTLASS_HOST_DEVICE cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::TensorView ( )
diff --git a/docs/debug_8h_source.html b/docs/debug_8h_source.html index 881b4e3f0..c404b4110 100644 --- a/docs/debug_8h_source.html +++ b/docs/debug_8h_source.html @@ -81,7 +81,7 @@ $(function() { diff --git a/docs/dgemm__traits_8h.html b/docs/dgemm__traits_8h.html index eebc2f364..ac6d33b0c 100644 --- a/docs/dgemm__traits_8h.html +++ b/docs/dgemm__traits_8h.html @@ -82,21 +82,21 @@ $(function() {

Defines structural traits of double-precision GEMM. More...

-
- + - + - + @@ -386,332 +582,34 @@ template<typename T> -

Member Function Documentation

- -

◆ at() [1/2]

+ +

◆ TensorView() [3/4]

-template<typename T>
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
CUTLASS_HOST_DEVICE cutlass::TensorView< T >::TensorView CUTLASS_HOST_DEVICE cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::TensorView (TensorRef_t const & Base const &  _ref,
Coord_t const & TensorCoord const &  _size 
- - -
- + - - - - -
CUTLASS_HOST_DEVICE T& cutlass::TensorView< T >::at CUTLASS_HOST_DEVICE cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::TensorView (Coord_t const & coord) const
-
-inline
-
- -
- - -

◆ at() [2/2]

- -
-
-
-template<typename T>
- - - - - -
- - - - - - - - -
CUTLASS_HOST_DEVICE T& cutlass::TensorView< T >::at (Offset_t idx) const
-
-inline
-
- -
-
- -

◆ const_ref()

- -
-
-
-template<typename T>
- - - - - -
- - - - - - - -
CUTLASS_HOST_DEVICE ConstTensorRef_t cutlass::TensorView< T >::const_ref ()
-
-inline
-
- -
-
- -

◆ contains()

- -
-
-
-template<typename T>
- - - - - -
- - - - - - - - -
CUTLASS_HOST_DEVICE bool cutlass::TensorView< T >::contains (Coord_t const & coord) const
-
-inline
-
- -
-
- -

◆ data()

- -
-
-
-template<typename T>
- - - - - -
- - - - - - - -
CUTLASS_HOST_DEVICE T* cutlass::TensorView< T >::data () const
-
-inline
-
- -
-
- -

◆ good()

- -
-
-
-template<typename T>
- - - - - -
- - - - - - - -
CUTLASS_HOST_DEVICE bool cutlass::TensorView< T >::good () const
-
-inline
-
- -
-
- -

◆ offset()

- -
-
-
-template<typename T>
- - - - - -
- - - - - - - - -
CUTLASS_HOST_DEVICE Offset_t cutlass::TensorView< T >::offset (Coord_t const & coord) const
-
-inline
-
- -
-
- -

◆ operator=()

- -
-
-
-template<typename T>
- - - - - -
- - - - - - - - -
CUTLASS_HOST_DEVICE TensorView& cutlass::TensorView< T >::operator= (TensorView< T > const & _tensor)
-
-inline
-
- -
-
- -

◆ operator[]()

- -
-
-
-template<typename T>
- - - - - -
- - - - - - - - -
T& cutlass::TensorView< T >::operator[] (Coord< Rank > const & coord) const
-
-inline
-
- -
-
- -

◆ ref() [1/2]

- -
-
-
-template<typename T>
- - - - - -
- - - - - - - -
CUTLASS_HOST_DEVICE TensorRef_t& cutlass::TensorView< T >::ref ()
-
-inline
-
- -
-
- -

◆ ref() [2/2]

- -
-
-
-template<typename T>
- - - - - -
- - - - - - - -
CUTLASS_HOST_DEVICE TensorRef_t const& cutlass::TensorView< T >::ref () const
-
-inline
-
- -
-
- -

◆ reset()

- -
-
-
-template<typename T>
- - -
- - - - - - + + - - + + + + + + + + @@ -728,19 +626,64 @@ template<typename T> - -

◆ size() [1/2]

+ +

◆ TensorView() [4/4]

-template<typename T>
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
CUTLASS_HOST_DEVICE void cutlass::TensorView< T >::reset (TensorRef_t const & _ref = TensorRef_t(0), Storageptr,
Coord_t const & _size = Coord_t() StrideVector const & stride,
TensorCoord const & size 
+ + +
- + + + + + + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE Coord_t const& cutlass::TensorView< T >::size CUTLASS_HOST_DEVICE cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::TensorView (Storageptr,
StorageCoord const & stride,
TensorCoord const & size 
)
+
+inline
+
+ +
+ +

Member Function Documentation

+ +

◆ capacity()

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + +
+ + + @@ -755,19 +698,306 @@ template<typename T> - -

◆ size() [2/2]

+ +

◆ const_ref()

-template<typename T>
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
CUTLASS_HOST_DEVICE size_t cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::capacity ( ) const
+ + +
- + + + + + +
CUTLASS_HOST_DEVICE int cutlass::TensorView< T >::size CUTLASS_HOST_DEVICE ConstTensorRef cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::const_ref () const
+
+inline
+
+ +
+ + +

◆ contains()

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::contains (TensorCoord const & coord) const
+
+inline
+
+ +
+
+ +

◆ operator+()

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorView cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::operator+ (TensorCoord const & b) const
+
+inline
+
+ +
+
+ +

◆ operator+=()

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorView& cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::operator+= (TensorCoord const & b)
+
+inline
+
+ +
+
+ +

◆ operator-()

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorView cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::operator- (TensorCoord const & b) const
+
+inline
+
+ +
+
+ +

◆ operator-=()

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorView& cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::operator-= (TensorCoord const & b)
+
+inline
+
+ +
+
+ +

◆ operator=()

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorView& cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::operator= (TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > const & _tensor)
+
+inline
+
+ +
+
+ +

◆ ref()

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE TensorRef cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ref () const
+
+inline
+
+ +
+
+ +

◆ reset()

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE void cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::reset (Base const & _ref = Base(),
TensorCoord const & _size = TensorCoord() 
)
+
+inline
+
+ +
+
+ +

◆ size() [1/2]

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE TensorCoord const& cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::size () const
+
+inline
+
+ +
+
+ +

◆ size() [2/2]

+ +
+
+
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
+ + +
+ + + @@ -783,82 +1013,27 @@ template<typename T> - -

◆ stride() [1/2]

+ +

◆ subview()

-template<typename T>
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
CUTLASS_HOST_DEVICE Index cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::size ( int  dim)
- - -
- + - - - -
CUTLASS_HOST_DEVICE Coord_t const& cutlass::TensorView< T >::stride CUTLASS_HOST_DEVICE TensorView cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::subview () const
-
-inline
-
- -
- - -

◆ stride() [2/2]

- -
-
-
-template<typename T>
- - - - - -
- - - - - - - - -
CUTLASS_HOST_DEVICE int const& cutlass::TensorView< T >::stride (int dim) const
-
-inline
-
- -
-
- -

◆ subview()

- -
-
-
-template<typename T>
- - -
- - - - - + - + @@ -877,19 +1052,19 @@ template<typename T>

Member Data Documentation

- -

◆ Rank

+ +

◆ Rank

-template<typename T>
+template<typename Storage_ , int Rank_ = 4, typename MapFunc_ = IdentityTensorMapFunc<Rank_>, int StorageRank_ = MapFunc_::kStorageRank, typename Index_ = int, typename LongIndex_ = long long>
CUTLASS_HOST_DEVICE TensorView<T> cutlass::TensorView< T >::subview (Coord_t const & TensorCoord const &  location,
Coord_t TensorCoord  size 
@@ -907,7 +1082,7 @@ template<typename T> diff --git a/docs/classcutlass_1_1TensorView.png b/docs/classcutlass_1_1TensorView.png index 40500e8a3a854639f0a6a25982cb66d99a343817..46861ac917aafe499ecdf03bd95fbfbe0af5725f 100644 GIT binary patch literal 1605 zcmZXUdpOg39LIkfvSm(}=_0wDHFlgFa%ZC=x1og`w_N5DvXsiC6B2u5WU5EBEWp^gQQzs`GuG@Av!uT)v;b-mfp&31=(6b>CJ10OYZD zRxSVlhDx*(L`rhfN_G!RuDwoe#vIpJAjU8ZIN^tQpgS?Pq0cyM$80w z3U^5!7n-hc;0d>3;d=FY7?gZZ0$|(E6+uRc0;0`DB8Anr;od`_e3fZ zgP6Y!!EQCF?U7pySwv-yzewO35{<6wexMAOo9@>t3`%ihnzSgE94}N9yk$8F`aHOo z-hbvtPy0X{4Pvr_Zp4yfJJeZiCO;^o68d^_^2I``LasCe8hu=V&PJxB2+gA~@RL z2esF~0cdO@_B&7%?|Ekg@K2L*5bC?Qc7*1reNAyyyFZ;vg1;ndq2k7IAFxS^WUW6%Qy z$dTZq2r7_ihXRmACS&aJv|}9`QW2@O3n}R0G;ufdmEhXM#@hN4;Y9wrFg51k zC;#?O89~$~%z2cy*COP}+PGl=fO+kW`DNYG6ZNMkd%A|BEK?o0IUpCCTZ@av zP@v)!+^RZ{g!k>gz&`8!MsGTq3im1^kk>BOJzEA3_@6KD?V*o7c!%Jrx2VcQ1X>95 z(FLr$xaKsc21W#h<-s#;>8(6Im;dyVC2Y3QKSy5fu)E0)ffCuMgX#W(&5FabQy?qg}2*e1I->6g(QfX{@h& kb_n-V=`AbG(io!+sToy*IYd+=FZuHUtToQ6$`YUOFIczx`2YX_ delta 645 zcmX@gvx!x)Gr-TCmrII^fq{Y7)59eQNG}5701jp#xwb6MdZMCLJ(HuSi(^Oy~60G%{^_8$>hx0QzUbI<8}*`%yia8DwU!glb&4V zO{(JcoTd5H-ZLue%d6#Q7M)3|YpoFWo@BLm&aCI9=Q3vI?N)rooBG#g|GZB!)o+`w zTkefkJgpct$MYV$^GwB)eZut{KWx4I#C`LutH!r1mP@r;91eRg*c;q_W1^Y!$r($` ztd3k~T0^0%nGUF0xDWj#~iq&-zia>omEbQ|hdvmdy3Oa9}>8}kb5 zkElO<*R!9&&`RpSdIy#dS^|uFxLg?4GYKA8jhp+=`^c1_ThS^Fb45EC;#id$!oPR; z|8Ck>Zzg@f>t?d-Yu_FDg6FR?idhLVPTTeMrNC#qO|O2hcyl)H_tn#L?&;otW#<3) z#@dJL1y3I-oBb!_TG>xSl?{spBKH)(7rXf8rp?za%1PoIB-6gW5^&$nw(-{Qts257 zws$-gxi7kF&Zpf+wu;8BT(@t1p=a2AdwJzsOWPdQ-fF(B8=VxrY2N4jO%Vse(hFl> zWIOv@eEt95C!JaE&d%7%&0Vs3vFCzX?ii~okNq3te}CURo$o*$l5by?D=eFG>Dn*m Y*ZfM>QKdJefGLZ?)78&qol`;+0NEli^Z)<= diff --git a/docs/classcutlass_1_1ZipTileIterator-members.html b/docs/classcutlass_1_1ZipTileIterator-members.html new file mode 100644 index 000000000..6de74a494 --- /dev/null +++ b/docs/classcutlass_1_1ZipTileIterator-members.html @@ -0,0 +1,125 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+
- +
int const cutlass::TensorView< T >::Rank = TensorRef_t::Rankint const cutlass::TensorView< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::Rank = Base::kRank
+ + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+ + + + + + + + + +
+
+ + +
+ +
+ + + +
+
+
cutlass::ZipTileIterator< First_, Second_ > Member List
+
+
+ +

This is the complete list of members for cutlass::ZipTileIterator< First_, Second_ >, including all inherited members.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
add_pointer_offset(Index offset)cutlass::ZipTileIterator< First_, Second_ >inline
decrement(int count=1)cutlass::ZipTileIterator< First_, Second_ >inline
First typedefcutlass::ZipTileIterator< First_, Second_ >
firstcutlass::ZipTileIterator< First_, Second_ >
Fragment typedefcutlass::ZipTileIterator< First_, Second_ >
increment(int count=1)cutlass::ZipTileIterator< First_, Second_ >inline
Index typedefcutlass::ZipTileIterator< First_, Second_ >
initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))cutlass::ZipTileIterator< First_, Second_ >inline
initialize_predicates(PredicateIterator predicate_it, PredicateFunctor const &functor, Coord< 3 > const &block_offset)cutlass::ZipTileIterator< First_, Second_ >inline
load(Fragment &fragment) constcutlass::ZipTileIterator< First_, Second_ >inline
load(Fragment &fragment, Coord< 4 > const &offset) constcutlass::ZipTileIterator< First_, Second_ >inline
load(Fragment &fragment, PredicateIterator pred_it) constcutlass::ZipTileIterator< First_, Second_ >inline
load_post_increment(Fragment &fragment)cutlass::ZipTileIterator< First_, Second_ >inline
load_post_increment(Fragment &fragment, Coord< 4 > const &offset)cutlass::ZipTileIterator< First_, Second_ >inline
load_post_increment(Fragment &fragment, PredicateIterator pred_it)cutlass::ZipTileIterator< First_, Second_ >inline
operator++()cutlass::ZipTileIterator< First_, Second_ >inline
operator+=(int count)cutlass::ZipTileIterator< First_, Second_ >inline
operator+=(Coord< 3 > const &offset)cutlass::ZipTileIterator< First_, Second_ >inline
operator--()cutlass::ZipTileIterator< First_, Second_ >inline
operator-=(int count)cutlass::ZipTileIterator< First_, Second_ >inline
PredicateVector typedefcutlass::ZipTileIterator< First_, Second_ >
secondcutlass::ZipTileIterator< First_, Second_ >
Second typedefcutlass::ZipTileIterator< First_, Second_ >
store(Fragment const &fragment) constcutlass::ZipTileIterator< First_, Second_ >inline
store(Fragment const &fragment, Coord< 4 > const &offset) constcutlass::ZipTileIterator< First_, Second_ >inline
store(Fragment const &fragment, PredicateIterator pred_it) constcutlass::ZipTileIterator< First_, Second_ >inline
store_post_increment(Fragment const &fragment)cutlass::ZipTileIterator< First_, Second_ >inline
store_post_increment(Fragment const &fragment, Coord< 4 > const &offset)cutlass::ZipTileIterator< First_, Second_ >inline
store_post_increment(Fragment const &fragment, PredicateIterator pred_it)cutlass::ZipTileIterator< First_, Second_ >inline
TensorRef typedefcutlass::ZipTileIterator< First_, Second_ >
ZipTileIterator()cutlass::ZipTileIterator< First_, Second_ >inline
ZipTileIterator(Params const &_params, Coord< 3 > const &threadblock_offset=make_Coord(0, 0, 0))cutlass::ZipTileIterator< First_, Second_ >inline
ZipTileIterator(First const &_first, Second const &_second)cutlass::ZipTileIterator< First_, Second_ >inline
ZipTileIterator(TensorRef const &ref)cutlass::ZipTileIterator< First_, Second_ >inline
ZipTileIterator(Params const &_params, TensorRef const &ref)cutlass::ZipTileIterator< First_, Second_ >inline
+ + + + diff --git a/docs/classcutlass_1_1ZipTileIterator.html b/docs/classcutlass_1_1ZipTileIterator.html new file mode 100644 index 000000000..7cf7a392b --- /dev/null +++ b/docs/classcutlass_1_1ZipTileIterator.html @@ -0,0 +1,1290 @@ + + + + + + + +Cutlass: cutlass::ZipTileIterator< First_, Second_ > Class Template Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::ZipTileIterator< First_, Second_ > Class Template Reference
+
+
+ +

Constructs an iterator from a pair of iterators. +

+ +

#include <zip_tile_iterator.h>

+ + + + + +

+Classes

struct  Params
 Params object. More...
 
+ + + + + + + + + + + + + + + + + + + +

+Public Types

typedef First_ First
 First iterator type. More...
 
typedef Second_ Second
 Second iterator type. More...
 
typedef ZipFragment< typename First::Fragment, typename Second::Fragment > Fragment
 Fragment type. More...
 
typedef First::PredicateVector PredicateVector
 Predicate vector. More...
 
typedef First::Index Index
 Index type. More...
 
typedef ZipTensorRef< typename First::TensorRef, typename Second::TensorRef > TensorRef
 Tensor reference. More...
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

CUTLASS_DEVICE ZipTileIterator ()
 Default constructor. More...
 
CUTLASS_DEVICE ZipTileIterator (Params const &_params, Coord< 3 > const &threadblock_offset=make_Coord(0, 0, 0))
 Constructs a zip iterator from params. More...
 
CUTLASS_DEVICE ZipTileIterator (First const &_first, Second const &_second)
 Constructs a zip iterator from iterator instances. More...
 
CUTLASS_DEVICE ZipTileIterator (TensorRef const &ref)
 Constructs a zip iterator from iterator instances. More...
 
CUTLASS_DEVICE ZipTileIterator (Params const &_params, TensorRef const &ref)
 Constructs a zip iterator from iterator instances. More...
 
template<typename PredicateIterator >
CUTLASS_HOST_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
 Initializes a predicate vector using a RegularTilePredicateFunctor. More...
 
template<typename PredicateIterator , typename PredicateFunctor >
CUTLASS_HOST_DEVICE void initialize_predicates (PredicateIterator predicate_it, PredicateFunctor const &functor, Coord< 3 > const &block_offset)
 Initializes a predicate vector using an arbitrary predicate functor. More...
 
template<typename Fragment >
CUTLASS_DEVICE void load_post_increment (Fragment &fragment)
 Loads a fragment and increments without predicates. More...
 
template<typename Fragment >
CUTLASS_DEVICE void load_post_increment (Fragment &fragment, Coord< 4 > const &offset)
 Loads a fragment and increments without predicates. More...
 
template<typename Fragment >
CUTLASS_DEVICE void load (Fragment &fragment) const
 Loads a fragment without predicates. More...
 
template<typename Fragment >
CUTLASS_DEVICE void load (Fragment &fragment, Coord< 4 > const &offset) const
 Loads a fragment without predicates. More...
 
template<typename Fragment >
CUTLASS_DEVICE void store_post_increment (Fragment const &fragment)
 Stores a fragment and increments without predicates. More...
 
template<typename Fragment >
CUTLASS_DEVICE void store_post_increment (Fragment const &fragment, Coord< 4 > const &offset)
 Stores a fragment and increments without predicates. More...
 
template<typename Fragment >
CUTLASS_DEVICE void store (Fragment const &fragment) const
 Stores a fragment without predicates. More...
 
template<typename Fragment >
CUTLASS_DEVICE void store (Fragment const &fragment, Coord< 4 > const &offset) const
 Stores a fragment without predicates. More...
 
template<typename Fragment , typename PredicateIterator >
CUTLASS_DEVICE void load_post_increment (Fragment &fragment, PredicateIterator pred_it)
 Loads a fragment and increments, using predicates. More...
 
template<typename Fragment , typename PredicateIterator >
CUTLASS_DEVICE void load (Fragment &fragment, PredicateIterator pred_it) const
 Loads a fragment with predicates. More...
 
template<typename Fragment , typename PredicateIterator >
CUTLASS_DEVICE void store_post_increment (Fragment const &fragment, PredicateIterator pred_it)
 Loads a fragment and increments, using predicates. More...
 
template<typename Fragment , typename PredicateIterator >
CUTLASS_DEVICE void store (Fragment const &fragment, PredicateIterator pred_it) const
 Loads a fragment with predicates. More...
 
CUTLASS_DEVICE ZipTileIteratorincrement (int count=1)
 Increments store iterator to next tile. More...
 
CUTLASS_DEVICE ZipTileIteratoroperator++ ()
 Increments to next tile. More...
 
CUTLASS_DEVICE ZipTileIteratoroperator+= (int count)
 
CUTLASS_DEVICE ZipTileIteratoroperator+= (Coord< 3 > const &offset)
 Adds a vector offset to the underlying iterators. More...
 
CUTLASS_DEVICE ZipTileIteratordecrement (int count=1)
 Increments store iterator to previous tile. More...
 
CUTLASS_DEVICE ZipTileIteratoroperator-- ()
 Increments to subsequent tile. More...
 
CUTLASS_DEVICE ZipTileIteratoroperator-= (int count)
 Decrements to previous tile. More...
 
CUTLASS_DEVICE void add_pointer_offset (Index offset)
 Adds an offset to both iterators. More...
 
+ + + + + + + +

+Public Attributes

First first
 First iterator. More...
 
Second second
 Second iterator. More...
 
+

Member Typedef Documentation

+ +

◆ First

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + +
typedef First_ cutlass::ZipTileIterator< First_, Second_ >::First
+
+ +
+
+ +

◆ Fragment

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + +
typedef ZipFragment<typename First::Fragment, typename Second::Fragment> cutlass::ZipTileIterator< First_, Second_ >::Fragment
+
+ +
+
+ +

◆ Index

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + +
typedef First::Index cutlass::ZipTileIterator< First_, Second_ >::Index
+
+ +
+
+ +

◆ PredicateVector

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + +
typedef First::PredicateVector cutlass::ZipTileIterator< First_, Second_ >::PredicateVector
+
+ +
+
+ +

◆ Second

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + +
typedef Second_ cutlass::ZipTileIterator< First_, Second_ >::Second
+
+ +
+
+ +

◆ TensorRef

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + +
typedef ZipTensorRef< typename First::TensorRef, typename Second::TensorRef> cutlass::ZipTileIterator< First_, Second_ >::TensorRef
+
+ +
+
+

Constructor & Destructor Documentation

+ +

◆ ZipTileIterator() [1/5]

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + + +
+ + + + + + + +
CUTLASS_DEVICE cutlass::ZipTileIterator< First_, Second_ >::ZipTileIterator ()
+
+inline
+
+ +
+
+ +

◆ ZipTileIterator() [2/5]

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_DEVICE cutlass::ZipTileIterator< First_, Second_ >::ZipTileIterator (Params const & _params,
Coord< 3 > const & threadblock_offset = make_Coord(0, 0, 0) 
)
+
+inline
+
+ +
+
+ +

◆ ZipTileIterator() [3/5]

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_DEVICE cutlass::ZipTileIterator< First_, Second_ >::ZipTileIterator (First const & _first,
Second const & _second 
)
+
+inline
+
+ +
+
+ +

◆ ZipTileIterator() [4/5]

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_DEVICE cutlass::ZipTileIterator< First_, Second_ >::ZipTileIterator (TensorRef const & ref)
+
+inline
+
+ +
+
+ +

◆ ZipTileIterator() [5/5]

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_DEVICE cutlass::ZipTileIterator< First_, Second_ >::ZipTileIterator (Params const & _params,
TensorRef const & ref 
)
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ add_pointer_offset()

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_DEVICE void cutlass::ZipTileIterator< First_, Second_ >::add_pointer_offset (Index offset)
+
+inline
+
+ +
+
+ +

◆ decrement()

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_DEVICE ZipTileIterator& cutlass::ZipTileIterator< First_, Second_ >::decrement (int count = 1)
+
+inline
+
+ +
+
+ +

◆ increment()

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_DEVICE ZipTileIterator& cutlass::ZipTileIterator< First_, Second_ >::increment (int count = 1)
+
+inline
+
+ +
+
+ +

◆ initialize_predicates() [1/2]

+ +
+
+
+template<typename First_ , typename Second_ >
+
+template<typename PredicateIterator >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE void cutlass::ZipTileIterator< First_, Second_ >::initialize_predicates (PredicateIterator predicate_it,
Coord< 3 > const & bounds,
Coord< 3 > const & block_offset = make_Coord(0,                                                                                           0,                                                                                           0) 
)
+
+inline
+
+ +
+
+ +

◆ initialize_predicates() [2/2]

+ +
+
+
+template<typename First_ , typename Second_ >
+
+template<typename PredicateIterator , typename PredicateFunctor >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE void cutlass::ZipTileIterator< First_, Second_ >::initialize_predicates (PredicateIterator predicate_it,
PredicateFunctor const & functor,
Coord< 3 > const & block_offset 
)
+
+inline
+
+ +
+
+ +

◆ load() [1/3]

+ +
+
+
+template<typename First_ , typename Second_ >
+
+template<typename Fragment >
+ + + + + +
+ + + + + + + + +
CUTLASS_DEVICE void cutlass::ZipTileIterator< First_, Second_ >::load (Fragmentfragment) const
+
+inline
+
+ +
+
+ +

◆ load() [2/3]

+ +
+
+
+template<typename First_ , typename Second_ >
+
+template<typename Fragment >
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_DEVICE void cutlass::ZipTileIterator< First_, Second_ >::load (Fragmentfragment,
Coord< 4 > const & offset 
) const
+
+inline
+
+ +
+
+ +

◆ load() [3/3]

+ +
+
+
+template<typename First_ , typename Second_ >
+
+template<typename Fragment , typename PredicateIterator >
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_DEVICE void cutlass::ZipTileIterator< First_, Second_ >::load (Fragmentfragment,
PredicateIterator pred_it 
) const
+
+inline
+
+ +
+
+ +

◆ load_post_increment() [1/3]

+ +
+
+
+template<typename First_ , typename Second_ >
+
+template<typename Fragment >
+ + + + + +
+ + + + + + + + +
CUTLASS_DEVICE void cutlass::ZipTileIterator< First_, Second_ >::load_post_increment (Fragmentfragment)
+
+inline
+
+ +
+
+ +

◆ load_post_increment() [2/3]

+ +
+
+
+template<typename First_ , typename Second_ >
+
+template<typename Fragment >
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_DEVICE void cutlass::ZipTileIterator< First_, Second_ >::load_post_increment (Fragmentfragment,
Coord< 4 > const & offset 
)
+
+inline
+
+ +
+
+ +

◆ load_post_increment() [3/3]

+ +
+
+
+template<typename First_ , typename Second_ >
+
+template<typename Fragment , typename PredicateIterator >
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_DEVICE void cutlass::ZipTileIterator< First_, Second_ >::load_post_increment (Fragmentfragment,
PredicateIterator pred_it 
)
+
+inline
+
+ +
+
+ +

◆ operator++()

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + + +
+ + + + + + + +
CUTLASS_DEVICE ZipTileIterator& cutlass::ZipTileIterator< First_, Second_ >::operator++ ()
+
+inline
+
+ +
+
+ +

◆ operator+=() [1/2]

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_DEVICE ZipTileIterator& cutlass::ZipTileIterator< First_, Second_ >::operator+= (int count)
+
+inline
+
+ +
+
+ +

◆ operator+=() [2/2]

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_DEVICE ZipTileIterator& cutlass::ZipTileIterator< First_, Second_ >::operator+= (Coord< 3 > const & offset)
+
+inline
+
+ +
+
+ +

◆ operator--()

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + + +
+ + + + + + + +
CUTLASS_DEVICE ZipTileIterator& cutlass::ZipTileIterator< First_, Second_ >::operator-- ()
+
+inline
+
+ +
+
+ +

◆ operator-=()

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_DEVICE ZipTileIterator& cutlass::ZipTileIterator< First_, Second_ >::operator-= (int count)
+
+inline
+
+ +
+
+ +

◆ store() [1/3]

+ +
+
+
+template<typename First_ , typename Second_ >
+
+template<typename Fragment >
+ + + + + +
+ + + + + + + + +
CUTLASS_DEVICE void cutlass::ZipTileIterator< First_, Second_ >::store (Fragment const & fragment) const
+
+inline
+
+ +
+
+ +

◆ store() [2/3]

+ +
+
+
+template<typename First_ , typename Second_ >
+
+template<typename Fragment >
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_DEVICE void cutlass::ZipTileIterator< First_, Second_ >::store (Fragment const & fragment,
Coord< 4 > const & offset 
) const
+
+inline
+
+ +
+
+ +

◆ store() [3/3]

+ +
+
+
+template<typename First_ , typename Second_ >
+
+template<typename Fragment , typename PredicateIterator >
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_DEVICE void cutlass::ZipTileIterator< First_, Second_ >::store (Fragment const & fragment,
PredicateIterator pred_it 
) const
+
+inline
+
+ +
+
+ +

◆ store_post_increment() [1/3]

+ +
+
+
+template<typename First_ , typename Second_ >
+
+template<typename Fragment >
+ + + + + +
+ + + + + + + + +
CUTLASS_DEVICE void cutlass::ZipTileIterator< First_, Second_ >::store_post_increment (Fragment const & fragment)
+
+inline
+
+ +
+
+ +

◆ store_post_increment() [2/3]

+ +
+
+
+template<typename First_ , typename Second_ >
+
+template<typename Fragment >
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_DEVICE void cutlass::ZipTileIterator< First_, Second_ >::store_post_increment (Fragment const & fragment,
Coord< 4 > const & offset 
)
+
+inline
+
+ +
+
+ +

◆ store_post_increment() [3/3]

+ +
+
+
+template<typename First_ , typename Second_ >
+
+template<typename Fragment , typename PredicateIterator >
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_DEVICE void cutlass::ZipTileIterator< First_, Second_ >::store_post_increment (Fragment const & fragment,
PredicateIterator pred_it 
)
+
+inline
+
+ +
+
+

Member Data Documentation

+ +

◆ first

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + +
First cutlass::ZipTileIterator< First_, Second_ >::first
+
+ +
+
+ +

◆ second

+ +
+
+
+template<typename First_ , typename Second_ >
+ + + + +
Second cutlass::ZipTileIterator< First_, Second_ >::second
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/classcutlass_1_1detail_1_1ScalarOrPointer-members.html b/docs/classcutlass_1_1detail_1_1ScalarOrPointer-members.html new file mode 100644 index 000000000..8da714665 --- /dev/null +++ b/docs/classcutlass_1_1detail_1_1ScalarOrPointer-members.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass::detail::ScalarOrPointer< Scalar_ > Member List
+
+ + + + + diff --git a/docs/classcutlass_1_1detail_1_1ScalarOrPointer.html b/docs/classcutlass_1_1detail_1_1ScalarOrPointer.html new file mode 100644 index 000000000..6a28c38f8 --- /dev/null +++ b/docs/classcutlass_1_1detail_1_1ScalarOrPointer.html @@ -0,0 +1,434 @@ + + + + + + + +Cutlass: cutlass::detail::ScalarOrPointer< Scalar_ > Class Template Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::detail::ScalarOrPointer< Scalar_ > Class Template Reference
+
+
+ +

#include <scalar_or_pointer.h>

+ + + + + +

+Public Types

typedef Scalar_ Scalar
 Underlying scalar type. More...
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

CUTLASS_HOST_DEVICE ScalarOrPointer ()
 Default ctor. More...
 
CUTLASS_HOST_DEVICE ScalarOrPointer (Scalar const &val)
 Object behaves as a scalar. More...
 
CUTLASS_HOST_DEVICE ScalarOrPointer (Scalar const *ptr_)
 Object behaves as a scalar. More...
 
CUTLASS_HOST_DEVICE bool is_pointer () const
 Returns true if is pointer. More...
 
CUTLASS_HOST_DEVICE Scalar const * get_ptr () const
 Gets the pointer value. More...
 
CUTLASS_HOST_DEVICE Scalar get_scalar () const
 Gets the pointer value. More...
 
CUTLASS_HOST_DEVICE ScalarOrPointeroperator= (Scalar const &scalar_)
 Assigns to a scalar and sets pointer to nullptr. More...
 
CUTLASS_HOST_DEVICE ScalarOrPointeroperator= (Scalar const *ptr_)
 Assigns to a pointer value. More...
 
CUTLASS_HOST_DEVICE Scalar get () const
 Access the element. More...
 
CUTLASS_HOST_DEVICE operator Scalar () const
 Accesses the element. More...
 
+

Detailed Description

+

template<typename Scalar_>
+class cutlass::detail::ScalarOrPointer< Scalar_ >

+ +

Helper class defines an object which operates as either a scalar or a pointer. If the pointer is non-null, it is dereferenced when the object is accessed.

+

Member Typedef Documentation

+ +

◆ Scalar

+ +
+
+
+template<typename Scalar_>
+ + + + +
typedef Scalar_ cutlass::detail::ScalarOrPointer< Scalar_ >::Scalar
+
+ +
+
+

Constructor & Destructor Documentation

+ +

◆ ScalarOrPointer() [1/3]

+ +
+
+
+template<typename Scalar_>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE cutlass::detail::ScalarOrPointer< Scalar_ >::ScalarOrPointer ()
+
+inline
+
+ +
+
+ +

◆ ScalarOrPointer() [2/3]

+ +
+
+
+template<typename Scalar_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::detail::ScalarOrPointer< Scalar_ >::ScalarOrPointer (Scalar const & val)
+
+inline
+
+ +
+
+ +

◆ ScalarOrPointer() [3/3]

+ +
+
+
+template<typename Scalar_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::detail::ScalarOrPointer< Scalar_ >::ScalarOrPointer (Scalar const * ptr_)
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ get()

+ +
+
+
+template<typename Scalar_>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Scalar cutlass::detail::ScalarOrPointer< Scalar_ >::get () const
+
+inline
+
+ +
+
+ +

◆ get_ptr()

+ +
+
+
+template<typename Scalar_>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Scalar const* cutlass::detail::ScalarOrPointer< Scalar_ >::get_ptr () const
+
+inline
+
+ +
+
+ +

◆ get_scalar()

+ +
+
+
+template<typename Scalar_>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Scalar cutlass::detail::ScalarOrPointer< Scalar_ >::get_scalar () const
+
+inline
+
+ +
+
+ +

◆ is_pointer()

+ +
+
+
+template<typename Scalar_>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::detail::ScalarOrPointer< Scalar_ >::is_pointer () const
+
+inline
+
+ +
+
+ +

◆ operator Scalar()

+ +
+
+
+template<typename Scalar_>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE cutlass::detail::ScalarOrPointer< Scalar_ >::operator Scalar () const
+
+inline
+
+ +
+
+ +

◆ operator=() [1/2]

+ +
+
+
+template<typename Scalar_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ScalarOrPointer& cutlass::detail::ScalarOrPointer< Scalar_ >::operator= (Scalar const & scalar_)
+
+inline
+
+ +
+
+ +

◆ operator=() [2/2]

+ +
+
+
+template<typename Scalar_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ScalarOrPointer& cutlass::detail::ScalarOrPointer< Scalar_ >::operator= (Scalar const * ptr_)
+
+inline
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/classcutlass_1_1gemm_1_1LinearScalingDevicePtr_1_1Params-members.html b/docs/classcutlass_1_1gemm_1_1LinearScalingDevicePtr_1_1Params-members.html new file mode 100644 index 000000000..323b1406c --- /dev/null +++ b/docs/classcutlass_1_1gemm_1_1LinearScalingDevicePtr_1_1Params-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass::gemm::LinearScalingDevicePtr< Scalar_, FragmentMultiplyAdd_ >::Params Member List
+
+ + + + + diff --git a/docs/classcutlass_1_1gemm_1_1LinearScalingDevicePtr_1_1Params.html b/docs/classcutlass_1_1gemm_1_1LinearScalingDevicePtr_1_1Params.html new file mode 100644 index 000000000..5fc5d05e3 --- /dev/null +++ b/docs/classcutlass_1_1gemm_1_1LinearScalingDevicePtr_1_1Params.html @@ -0,0 +1,389 @@ + + + + + + + +Cutlass: cutlass::gemm::LinearScalingDevicePtr< Scalar_, FragmentMultiplyAdd_ >::Params Class Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::gemm::LinearScalingDevicePtr< Scalar_, FragmentMultiplyAdd_ >::Params Class Reference
+
+
+ +

The parameters. +

+ +

#include <linear_scaling_device_ptr.h>

+ + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

CUTLASS_HOST_DEVICE Params ()
 
CUTLASS_HOST_DEVICE Params (Scalar alpha, Scalar beta)
 
CUTLASS_HOST_DEVICE Params (Scalar const *alpha_ptr, Scalar const *beta_ptr)
 
CUTLASS_HOST_DEVICE int initialize (Scalar alpha, Scalar beta)
 Initialize the parameters. More...
 
CUTLASS_HOST_DEVICE int initialize (Scalar const *alpha, Scalar const *beta)
 Initialize the parameters. More...
 
template<typename GemmDesc_ >
CUTLASS_HOST_DEVICE int initialize (GemmDesc_ const &desc)
 Initialize the parameters. More...
 
CUTLASS_HOST_DEVICE Scalar alpha () const
 Gets the alpha scalar. More...
 
CUTLASS_HOST_DEVICE Scalar beta () const
 Gets the beta scalar. More...
 
+

Constructor & Destructor Documentation

+ +

◆ Params() [1/3]

+ +
+
+
+template<typename Scalar_ , typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE cutlass::gemm::LinearScalingDevicePtr< Scalar_, FragmentMultiplyAdd_ >::Params::Params ()
+
+inline
+
+ +
+
+ +

◆ Params() [2/3]

+ +
+
+
+template<typename Scalar_ , typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::gemm::LinearScalingDevicePtr< Scalar_, FragmentMultiplyAdd_ >::Params::Params (Scalar alpha,
Scalar beta 
)
+
+inline
+
+ +
+
+ +

◆ Params() [3/3]

+ +
+
+
+template<typename Scalar_ , typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::gemm::LinearScalingDevicePtr< Scalar_, FragmentMultiplyAdd_ >::Params::Params (Scalar const * alpha_ptr,
Scalar const * beta_ptr 
)
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ alpha()

+ +
+
+
+template<typename Scalar_ , typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Scalar cutlass::gemm::LinearScalingDevicePtr< Scalar_, FragmentMultiplyAdd_ >::Params::alpha () const
+
+inline
+
+ +
+
+ +

◆ beta()

+ +
+
+
+template<typename Scalar_ , typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Scalar cutlass::gemm::LinearScalingDevicePtr< Scalar_, FragmentMultiplyAdd_ >::Params::beta () const
+
+inline
+
+ +
+
+ +

◆ initialize() [1/3]

+ +
+
+
+template<typename Scalar_ , typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE int cutlass::gemm::LinearScalingDevicePtr< Scalar_, FragmentMultiplyAdd_ >::Params::initialize (Scalar alpha,
Scalar beta 
)
+
+inline
+
+ +
+
+ +

◆ initialize() [2/3]

+ +
+
+
+template<typename Scalar_ , typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE int cutlass::gemm::LinearScalingDevicePtr< Scalar_, FragmentMultiplyAdd_ >::Params::initialize (Scalar const * alpha,
Scalar const * beta 
)
+
+inline
+
+ +
+
+ +

◆ initialize() [3/3]

+ +
+
+
+template<typename Scalar_ , typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_>>
+
+template<typename GemmDesc_ >
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE int cutlass::gemm::LinearScalingDevicePtr< Scalar_, FragmentMultiplyAdd_ >::Params::initialize (GemmDesc_ const & desc)
+
+inline
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/classcutlass_1_1platform_1_1complex-members.html b/docs/classcutlass_1_1platform_1_1complex-members.html new file mode 100644 index 000000000..3e19742e0 --- /dev/null +++ b/docs/classcutlass_1_1platform_1_1complex-members.html @@ -0,0 +1,100 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass::platform::complex< T > Member List
+
+ + + + + diff --git a/docs/classcutlass_1_1platform_1_1complex.html b/docs/classcutlass_1_1platform_1_1complex.html new file mode 100644 index 000000000..672fef7e9 --- /dev/null +++ b/docs/classcutlass_1_1platform_1_1complex.html @@ -0,0 +1,413 @@ + + + + + + + +Cutlass: cutlass::platform::complex< T > Class Template Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::platform::complex< T > Class Template Reference
+
+
+ +

#include <complex.h>

+ + + + + +

+Public Types

typedef T value_type
 Type alias for scalar type. More...
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

CUTLASS_HOST_DEVICE complex (T r=T(0), T i=T(0))
 Constructor. More...
 
CUTLASS_HOST_DEVICE complex (cuFloatComplex const &z)
 Conversion from cuFloatComplex. More...
 
CUTLASS_HOST_DEVICE complex (cuDoubleComplex const &z)
 Conversion from cuDoubleComplex. More...
 
CUTLASS_HOST_DEVICE T const & real () const
 Accesses the real part of the complex number. More...
 
CUTLASS_HOST_DEVICE T & real ()
 Accesses the real part of the complex number. More...
 
CUTLASS_HOST_DEVICE T const & imag () const
 Accesses the imaginary part of the complex number. More...
 
CUTLASS_HOST_DEVICE T & imag ()
 Accesses the imaginary part of the complex number. More...
 
CUTLASS_HOST_DEVICE operator cuFloatComplex () const
 Converts to cuFloatComplex. More...
 
CUTLASS_HOST_DEVICE operator cuDoubleComplex () const
 Converts to cuDoubleComplex. More...
 
+

Detailed Description

+

template<typename T>
+class cutlass::platform::complex< T >

+ +

Class for representing and manipulating complex numbers with conversions from built-in CUDA complex types.

+

Member Typedef Documentation

+ +

◆ value_type

+ +
+
+
+template<typename T>
+ + + + +
typedef T cutlass::platform::complex< T >::value_type
+
+ +
+
+

Constructor & Destructor Documentation

+ +

◆ complex() [1/3]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::platform::complex< T >::complex (r = T(0),
i = T(0) 
)
+
+inline
+
+ +
+
+ +

◆ complex() [2/3]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::platform::complex< T >::complex (cuFloatComplex const & z)
+
+inline
+
+ +
+
+ +

◆ complex() [3/3]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::platform::complex< T >::complex (cuDoubleComplex const & z)
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ imag() [1/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE T const& cutlass::platform::complex< T >::imag () const
+
+inline
+
+ +
+
+ +

◆ imag() [2/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE T& cutlass::platform::complex< T >::imag ()
+
+inline
+
+ +
+
+ +

◆ operator cuDoubleComplex()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE cutlass::platform::complex< T >::operator cuDoubleComplex () const
+
+inline
+
+ +
+
+ +

◆ operator cuFloatComplex()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE cutlass::platform::complex< T >::operator cuFloatComplex () const
+
+inline
+
+ +
+
+ +

◆ real() [1/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE T const& cutlass::platform::complex< T >::real () const
+
+inline
+
+ +
+
+ +

◆ real() [2/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE T& cutlass::platform::complex< T >::real ()
+
+inline
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/classcutlass_1_1platform_1_1unique__ptr-members.html b/docs/classcutlass_1_1platform_1_1unique__ptr-members.html index 696f47884..1242de683 100644 --- a/docs/classcutlass_1_1platform_1_1unique__ptr-members.html +++ b/docs/classcutlass_1_1platform_1_1unique__ptr-members.html @@ -98,7 +98,7 @@ $(function() {
diff --git a/docs/classcutlass_1_1platform_1_1unique__ptr.html b/docs/classcutlass_1_1platform_1_1unique__ptr.html index cf455f2e5..625e790b8 100644 --- a/docs/classcutlass_1_1platform_1_1unique__ptr.html +++ b/docs/classcutlass_1_1platform_1_1unique__ptr.html @@ -546,7 +546,7 @@ template<class T, class Deleter = default_delete<T>>
diff --git a/docs/classes.html b/docs/classes.html index 9896653f6..6a517312c 100644 --- a/docs/classes.html +++ b/docs/classes.html @@ -72,100 +72,116 @@ $(function() {
Class Index
-
a | b | c | d | e | f | g | h | i | l | m | n | p | r | s | t | u | v | w
+
a | b | c | d | e | f | g | h | i | k | l | m | n | p | r | s | t | u | v | w | z
- - - - - - - - - - - - - - - - + + + + + + + + + - - - + + + + + + + + + + - - + + + + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + - - - + + + - - - - + + + + + + + - - - - - - + + + + + + + - - - - - - - + + + + + + + + + + + + +
  a  
-
FragmentMultiplyAdd (cutlass::gemm)   IgemmEpilogueScalar (cutlass::gemm)   Load< Scalar_, Lanes_, Memory_, true, 8 > (cutlass)   GlobalLoadStreamBase::SharedStorage (cutlass::gemm)   
FragmentMultiplyAdd< half > (cutlass::gemm)   IgemmEpilogueScalar< int > (cutlass::gemm)   log2_down (cutlass)   SimplifiedGemmEpilogueTraits (cutlass::gemm)   
aligned_chunk (cutlass::platform)   FragmentStore (cutlass)   IgemmEpilogueTraits (cutlass::gemm)   log2_down< N, 1, Count > (cutlass)   SimplifiedGemmTraits (cutlass::gemm)   
aligned_storage (cutlass::platform)   FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > (cutlass)   IgemmEpilogueTraitsHelper (cutlass::gemm)   log2_up (cutlass)   SimplifiedGemmTraitsHelper (cutlass::gemm)   
AlignedStruct (cutlass)   FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > (cutlass)   IgemmFloatToInt8Converter (cutlass::gemm)   log2_up< N, 1, Count > (cutlass)   sqrt_est (cutlass)   
alignment_of (cutlass::platform)   
  g  
-
IgemmGlobalLoadTransformer (cutlass::gemm)   
  m  
-
StorageType (cutlass)   
alignment_of< const value_t > (cutlass::platform)   IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float > (cutlass::gemm)   StorageType< 1 > (cutlass)   
alignment_of< const volatile value_t > (cutlass::platform)   Gemm (cutlass::gemm)   IgemmGlobalStoreTransformer (cutlass::gemm)   GemmTraits::MainLoopSharedStorage (cutlass::gemm)   StorageType< 2 > (cutlass)   
alignment_of< double2 > (cutlass::platform)   GemmConfig (cutlass::gemm)   IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > > (cutlass::gemm)   MatrixLayout (cutlass)   StorageType< 4 > (cutlass)   
alignment_of< double4 > (cutlass::platform)   GemmDesc (cutlass::gemm)   IgemmInt8ToFloatConverter (cutlass::gemm)   MemorySpace (cutlass)   Store (cutlass)   
alignment_of< float4 > (cutlass::platform)   GemmEpilogue (cutlass::gemm)   IgemmSharedStoreTransformer (cutlass::gemm)   
  n  
-
Store< double, 2, Memory_, true, 16 > (cutlass)   
alignment_of< int4 > (cutlass::platform)   GemmEpilogueTraits (cutlass::gemm)   IgemmSwizzle (cutlass::gemm)   Store< Scalar_, Lanes_, Memory_, true, 16 > (cutlass)   
alignment_of< long4 > (cutlass::platform)   GemmEpilogueTraitsHelper (cutlass::gemm)   IgemmTileTraitsHelperA (cutlass::gemm)   nullptr_t (cutlass::platform)   Store< Scalar_, Lanes_, Memory_, true, 4 > (cutlass)   
alignment_of< longlong2 > (cutlass::platform)   GemmGlobalIteratorAb (cutlass::gemm)   IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > (cutlass::gemm)   
  p  
-
Store< Scalar_, Lanes_, Memory_, true, 8 > (cutlass)   
alignment_of< longlong4 > (cutlass::platform)   GemmGlobalIteratorCd (cutlass::gemm)   IgemmTileTraitsHelperB (cutlass::gemm)   GemmTraits::StreamSharedStorage (cutlass::gemm)   
alignment_of< uint4 > (cutlass::platform)   GemmGlobalTileCdTraits (cutlass::gemm)   IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > (cutlass::gemm)   alignment_of::pad (cutlass::platform)   GemmEpilogueTraits::StreamSharedStorage (cutlass::gemm)   
alignment_of< ulong4 > (cutlass::platform)   GemmGlobalTileTraits (cutlass::gemm)   IgemmTraits (cutlass::gemm)   WmmaGemmGlobalIteratorCd::Params (cutlass::gemm)   
  t  
+
GemmConfig (cutlass::gemm)   IgemmTraitsHelper (cutlass::gemm)   LinearScalingDevicePtr::Params (cutlass::gemm)   Store< double, 2, Memory_, FragmentElementType::kScalar, double, kStride, 16 > (cutlass)   
GemmCoord (cutlass::gemm)   IgemmTransformerA (cutlass::gemm)   GlobalLoadStream::Params (cutlass::gemm)   Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, 1, 2 > (cutlass)   
aligned_chunk (cutlass::platform)   GemmDesc (cutlass::gemm)   IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > (cutlass::gemm)   SharedStreamPair::Params (cutlass::gemm)   Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 16 > (cutlass)   
aligned_storage (cutlass::platform)   GemmEpilogue (cutlass::gemm)   IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > (cutlass::gemm)   WmmaGemmGlobalIteratorCd::Params (cutlass::gemm)   Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 4 > (cutlass)   
AlignedStruct (cutlass)   GemmEpilogueTraits (cutlass::gemm)   IgemmTransformerB (cutlass::gemm)   ZipTileIterator::Params (cutlass)   Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 8 > (cutlass)   
alignment_of (cutlass::platform)   GemmEpilogueTraitsHelper (cutlass::gemm)   IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > (cutlass::gemm)   GemmTraits::Params (cutlass::gemm)   Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size > (cutlass)   
alignment_of< const value_t > (cutlass::platform)   GemmGlobalIteratorAb (cutlass::gemm)   IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > (cutlass::gemm)   LinearScaling::Params (cutlass::gemm)   GemmEpilogueTraits::StreamSharedStorage (cutlass::gemm)   
alignment_of< const volatile value_t > (cutlass::platform)   GemmGlobalIteratorCd (cutlass::gemm)   int4_t (cutlass)   GemmGlobalIteratorAb::Params (cutlass::gemm)   TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::StrideVector (cutlass)   
alignment_of< double2 > (cutlass::platform)   GemmGlobalTileCdTraits (cutlass::gemm)   integral_constant (cutlass::platform)   GlobalLoadStreamPair::Params (cutlass::gemm)   swizzleDirection (cutlass::gemm)   
alignment_of< double4 > (cutlass::platform)   GemmGlobalTileTraits (cutlass::gemm)   is_arithmetic (cutlass::platform)   GemmGlobalIteratorCd::Params (cutlass::gemm)   
  t  
alignment_of< ulonglong2 > (cutlass::platform)   GemmMultiplicandTraits (cutlass::gemm)   IgemmTraitsHelper (cutlass::gemm)   GemmTraits::Params (cutlass::gemm)   
alignment_of< ulonglong4 > (cutlass::platform)   GemmOperand (cutlass)   IgemmTransformerA (cutlass::gemm)   GlobalLoadStreamBase::Params (cutlass::gemm)   TensorRef (cutlass)   
alignment_of< volatile value_t > (cutlass::platform)   GemmOperandTraitsAb (cutlass::gemm)   IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > (cutlass::gemm)   TileIteratorBase::Params (cutlass)   TensorView (cutlass)   
alignment_of< float4 > (cutlass::platform)   GemmMultiplicandTraits (cutlass::gemm)   is_base_of (cutlass::platform)   GemmEpilogueTraits::Params (cutlass::gemm)   
alignment_of< int4 > (cutlass::platform)   GemmOperand (cutlass)   is_base_of_helper (cutlass::platform)   TileIteratorBase::Params (cutlass)   TensorRef (cutlass)   
alignment_of< long4 > (cutlass::platform)   GemmOperandTraitsAb (cutlass::gemm)   is_floating_point (cutlass::platform)   TileLoadIterator::Params (cutlass)   TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ > (cutlass)   
alignment_of< longlong2 > (cutlass::platform)   GemmSharedLoadTileATraits (cutlass::gemm)   is_fundamental (cutlass::platform)   TileStoreIterator::Params (cutlass)   TensorRefArray (cutlass)   
alignment_of< longlong4 > (cutlass::platform)   GemmSharedLoadTileBTraits (cutlass::gemm)   is_integral (cutlass::platform)   TileLoadStream::Params (cutlass)   TensorRefBatchStrided (cutlass)   
alignment_of< uint4 > (cutlass::platform)   GemmSharedLoadTileDTraits (cutlass::gemm)   is_integral< char > (cutlass::platform)   TileStoreStream::Params (cutlass)   TensorView (cutlass)   
alignment_of< ulong4 > (cutlass::platform)   GemmSharedStoreTileAbTraits (cutlass::gemm)   is_integral< const T > (cutlass::platform)   SharedLoadStream::Params (cutlass::gemm)   ThreadMultiplyAdd (cutlass::gemm)   
alignment_of< ulonglong2 > (cutlass::platform)   GemmSharedStoreTileDTraits (cutlass::gemm)   is_integral< const volatile T > (cutlass::platform)   plus (cutlass::platform)   ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float > (cutlass::gemm)   
alignment_of< ulonglong4 > (cutlass::platform)   GemmSharedStoreWithSkewTileAbTraits (cutlass::gemm)   is_integral< int > (cutlass::platform)   PredicatedTileLoadStream (cutlass)   ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, half > (cutlass::gemm)   
alignment_of< volatile value_t > (cutlass::platform)   GemmTileTraitsHelperA (cutlass::gemm)   is_integral< long > (cutlass::platform)   PredicatedTileStoreStream (cutlass)   ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int > (cutlass::gemm)   
  b  
-
GemmSharedLoadTileATraits (cutlass::gemm)   IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > (cutlass::gemm)   GemmGlobalIteratorCd::Params (cutlass::gemm)   ThreadMultiplyAdd (cutlass::gemm)   
GemmSharedLoadTileBTraits (cutlass::gemm)   IgemmTransformerB (cutlass::gemm)   TileLoadIterator::Params (cutlass)   ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half > (cutlass::gemm)   
bool_constant (cutlass::platform)   GemmSharedLoadTileDTraits (cutlass::gemm)   IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > (cutlass::gemm)   TileStoreIterator::Params (cutlass)   ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int > (cutlass::gemm)   
GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > (cutlass::gemm)   is_integral< long long > (cutlass::platform)   PredicateTileAdapter (cutlass)   GemmSharedStoreTileAbTraits::ThreadOffset (cutlass::gemm)   
GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > (cutlass::gemm)   is_integral< short > (cutlass::platform)   TileLoadStream::PredicateVector (cutlass)   WmmaGemmGlobalIteratorCdTraits::ThreadOffset (cutlass::gemm)   
bin1_t (cutlass)   GemmTileTraitsHelperB (cutlass::gemm)   is_integral< signed char > (cutlass::platform)   PredicateVector (cutlass)   GemmGlobalTileCdTraits::ThreadOffset (cutlass::gemm)   
bool_constant (cutlass::platform)   GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > (cutlass::gemm)   is_integral< unsigned char > (cutlass::platform)   TileStoreStream::PredicateVector (cutlass)   GemmSharedLoadTileATraits::ThreadOffset (cutlass::gemm)   
  c  
-
GemmSharedStoreTileAbTraits (cutlass::gemm)   IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > (cutlass::gemm)   GemmEpilogueTraits::Params (cutlass::gemm)   GemmSharedLoadTileBTraits::ThreadOffset (cutlass::gemm)   
GemmSharedStoreTileDTraits (cutlass::gemm)   integral_constant (cutlass::platform)   Gemm::Params (cutlass::gemm)   GemmGlobalTileCdTraits::ThreadOffset (cutlass::gemm)   
ClearAccumulators (cutlass::gemm)   GemmSharedStoreWithSkewTileAbTraits (cutlass::gemm)   is_arithmetic (cutlass::platform)   SharedLoadStream::Params (cutlass::gemm)   IgemmContiguousGlobalTileTraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromShape (cutlass)   GemmTileTraitsHelperA (cutlass::gemm)   is_base_of (cutlass::platform)   LinearScaling::Params (cutlass::gemm)   GemmGlobalTileTraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > > (cutlass)   GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > (cutlass::gemm)   is_base_of_helper (cutlass::platform)   GemmGlobalIteratorAb::Params (cutlass::gemm)   GemmSharedLoadTileDTraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > > (cutlass)   GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > (cutlass::gemm)   is_floating_point (cutlass::platform)   plus (cutlass::platform)   GemmSharedLoadTileATraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromStrides (cutlass)   GemmTileTraitsHelperB (cutlass::gemm)   is_fundamental (cutlass::platform)   PredicateTileAdapter (cutlass)   GemmSharedStoreTileDTraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > > (cutlass)   GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > (cutlass::gemm)   is_integral (cutlass::platform)   PredicateVector (cutlass)   HgemmCrosswiseGlobalTileTraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > > (cutlass)   GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > (cutlass::gemm)   is_integral< char > (cutlass::platform)   ProjectOperand (cutlass::gemm)   GemmSharedStoreTileAbTraits::ThreadOffset (cutlass::gemm)   
ComputeThreadOffsetFromStrides (cutlass)   GemmTraits (cutlass::gemm)   is_integral< const T > (cutlass::platform)   ProjectOperand< GemmOperand::kA, Kstrided > (cutlass::gemm)   TileTraitsWarpRake::ThreadOffset (cutlass)   
ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > > (cutlass)   GetExtent (cutlass::gemm)   is_integral< const volatile T > (cutlass::platform)   ProjectOperand< GemmOperand::kB, Kstrided > (cutlass::gemm)   GemmSharedStoreWithSkewTileAbTraits::ThreadOffset (cutlass::gemm)   
ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > > (cutlass)   GetExtent< GemmOperand::kA, Tile_ > (cutlass::gemm)   is_integral< int > (cutlass::platform)   ProjectOperand< GemmOperand::kC, true > (cutlass::gemm)   WmmaGemmGlobalIteratorCdTraits::ThreadOffset (cutlass::gemm)   
conditional (cutlass::platform)   GetExtent< GemmOperand::kB, Tile_ > (cutlass::gemm)   is_integral< long > (cutlass::platform)   ProjectOperand< GemmOperand::kD, true > (cutlass::gemm)   TiledThreadOffset (cutlass)   
conditional< false, T, F > (cutlass::platform)   GemmTraits::GlobalLoadStream (cutlass::gemm)   is_integral< long long > (cutlass::platform)   
  r  
-
TileIteratorBase (cutlass)   
PredicateVector::ConstIterator (cutlass)   GlobalLoadStream (cutlass::gemm)   is_integral< short > (cutlass::platform)   TileLoadIterator (cutlass)   
ConstPredicateTileAdapter (cutlass)   GlobalLoadStreamBase (cutlass::gemm)   is_integral< signed char > (cutlass::platform)   remove_const (cutlass::platform)   TileStoreIterator (cutlass)   
Convert (cutlass)   greater (cutlass::platform)   is_integral< unsigned char > (cutlass::platform)   remove_const< const T > (cutlass::platform)   TileTraits (cutlass)   
Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > > (cutlass)   
  h  
-
is_integral< unsigned int > (cutlass::platform)   remove_cv (cutlass::platform)   TileTraitsContiguousMajor (cutlass)   
Coord (cutlass)   is_integral< unsigned long > (cutlass::platform)   remove_volatile (cutlass::platform)   TileTraitsStandard (cutlass)   
Copy (cutlass)   HgemmConfig (cutlass::gemm)   is_integral< unsigned long long > (cutlass::platform)   remove_volatile< volatile T > (cutlass::platform)   TileTraitsStrideMajor (cutlass)   
GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > (cutlass::gemm)   is_integral< unsigned int > (cutlass::platform)   ProjectOperand (cutlass::gemm)   GemmSharedStoreWithSkewTileAbTraits::ThreadOffset (cutlass::gemm)   
GemmTraits (cutlass::gemm)   is_integral< unsigned long > (cutlass::platform)   ProjectOperand< GemmOperand::kA, Kstrided > (cutlass::gemm)   IgemmGlobalTileTraits::ThreadOffset (cutlass::gemm)   
ClearAccumulators (cutlass::gemm)   GetExtent (cutlass::gemm)   is_integral< unsigned long long > (cutlass::platform)   ProjectOperand< GemmOperand::kB, Kstrided > (cutlass::gemm)   GemmSharedLoadTileBTraits::ThreadOffset (cutlass::gemm)   
MatrixLayout::ColumnMajor (cutlass)   GetExtent< GemmOperand::kA, Tile_ > (cutlass::gemm)   is_integral< unsigned short > (cutlass::platform)   ProjectOperand< GemmOperand::kC, true > (cutlass::gemm)   GemmGlobalTileTraits::ThreadOffset (cutlass::gemm)   
MatrixLayout::ColumnMajorBlockLinear (cutlass)   GetExtent< GemmOperand::kB, Tile_ > (cutlass::gemm)   is_integral< volatile T > (cutlass::platform)   ProjectOperand< GemmOperand::kD, true > (cutlass::gemm)   GemmSharedLoadTileDTraits::ThreadOffset (cutlass::gemm)   
ColumnMajorBlockSwizzle (cutlass::gemm)   GlobalLoadStream (cutlass::gemm)   is_pointer (cutlass::platform)   
  r  
+
TileTraitsWarpRake::ThreadOffset (cutlass)   
MatrixLayout::ColumnMajorInterleaved (cutlass)   GlobalLoadStreamPair (cutlass::gemm)   is_pointer_helper (cutlass::platform)   GemmSharedStoreTileDTraits::ThreadOffset (cutlass::gemm)   
complex (cutlass::platform)   greater (cutlass::platform)   is_pointer_helper< T * > (cutlass::platform)   RegularTilePredicateFunctor (cutlass)   HgemmCrosswiseGlobalTileTraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromShape (cutlass)   
  h  
+
is_pow2 (cutlass)   remove_const (cutlass::platform)   TileAllocation (cutlass)   
ComputeOffsetFromStrides (cutlass)   is_same (cutlass::platform)   remove_const< const T > (cutlass::platform)   TileCoord (cutlass)   
ComputeThreadOffsetFromStrides (cutlass)   HgemmConfig (cutlass::gemm)   is_same< A, A > (cutlass::platform)   remove_cv (cutlass::platform)   TiledThreadOffset (cutlass)   
ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > > (cutlass)   HgemmCrosswiseGlobalTileTraits (cutlass::gemm)   is_trivially_copyable (cutlass::platform)   remove_volatile (cutlass::platform)   TileIteratorBase (cutlass)   
ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > > (cutlass)   HgemmSwizzle (cutlass::gemm)   is_void (cutlass::platform)   remove_volatile< volatile T > (cutlass::platform)   TileLoadIterator (cutlass)   
conditional (cutlass::platform)   HgemmTileTraitsHelperA (cutlass::gemm)   is_volatile (cutlass::platform)   ReshapeThreads (cutlass::gemm)   TileLoadStream (cutlass)   
conditional< false, T, F > (cutlass::platform)   HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > (cutlass::gemm)   is_volatile< volatile T > (cutlass::platform)   ReshapeThreads< Tile_, Threads_, true > (cutlass::gemm)   TileStoreIterator (cutlass)   
PredicateVector::ConstIterator (cutlass)   HgemmTileTraitsHelperB (cutlass::gemm)   PredicateVector::Iterator (cutlass)   ReshapeTile (cutlass)   TileStoreStream (cutlass)   
TensorRefBatchStrided::ConstIterator (cutlass)   HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > (cutlass::gemm)   IteratorAdvance (cutlass)   ReshapeTile< Tile_, kAccessSize_, true > (cutlass)   TileTraits (cutlass)   
TensorRefArray::ConstIterator (cutlass)   HgemmTraits (cutlass::gemm)   
  k  
+
MatrixLayout::RowMajor (cutlass)   TileTraitsContiguousMajor (cutlass)   
ConstPredicateTileAdapter (cutlass)   HgemmTraitsHelper (cutlass::gemm)   MatrixLayout::RowMajorBlockLinear (cutlass)   TileTraitsStandard (cutlass)   
MatrixLayout::ContiguousLayout (cutlass)   HgemmTransformerA (cutlass::gemm)   KernelLaunchConfiguration (cutlass)   RowMajorBlockSwizzle (cutlass::gemm)   TileTraitsStrideMajor (cutlass)   
Convert (cutlass)   HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > (cutlass::gemm)   
  l  
+
MatrixLayout::RowMajorInterleaved (cutlass)   TileTraitsWarpRake (cutlass)   
Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > > (cutlass)   HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > (cutlass::gemm)   
  s  
+
PredicateVector::TrivialIterator (cutlass)   
Coord (cutlass)   HgemmTransformerB (cutlass::gemm)   Launch (cutlass::gemm)   TrivialPredicateTileAdapter (cutlass)   
Copy (cutlass)   HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > (cutlass::gemm)   Launch< Gemm, false > (cutlass::gemm)   ScalarIO (cutlass)   
  u  
+
  d  
-
HgemmCrosswiseGlobalTileTraits (cutlass::gemm)   is_integral< unsigned short > (cutlass::platform)   ReshapeThreads (cutlass::gemm)   TileTraitsWarpRake (cutlass)   
HgemmSwizzle (cutlass::gemm)   is_integral< volatile T > (cutlass::platform)   ReshapeThreads< Tile_, Threads_, true > (cutlass::gemm)   PredicateVector::TrivialIterator (cutlass)   
default_delete (cutlass::platform)   HgemmTileTraitsHelperA (cutlass::gemm)   is_pointer (cutlass::platform)   ReshapeTile (cutlass)   TrivialPredicateTileAdapter (cutlass)   
default_delete< T[]> (cutlass::platform)   HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > (cutlass::gemm)   is_pointer_helper (cutlass::platform)   ReshapeTile< Tile_, kAccessSize_, true > (cutlass)   
  u  
+
HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > (cutlass::gemm)   less (cutlass::platform)   ScalarOrPointer (cutlass::detail)   
  i  
+
LinearScaling (cutlass::gemm)   SgemmConfig (cutlass::gemm)   uint4_t (cutlass)   
DebugType   LinearScalingDevicePtr (cutlass::gemm)   SgemmLBTraits (cutlass::gemm)   unique_ptr (cutlass::platform)   
DebugValue   Identity (cutlass)   Load (cutlass)   SgemmTraits (cutlass::gemm)   
  v  
DgemmConfig (cutlass::gemm)   HgemmTileTraitsHelperB (cutlass::gemm)   is_pointer_helper< T * > (cutlass::platform)   
  s  
-
DgemmTraits (cutlass::gemm)   HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > (cutlass::gemm)   is_pow2 (cutlass)   unique_ptr (cutlass::platform)   
divide_assert (cutlass)   HgemmTraits (cutlass::gemm)   is_same (cutlass::platform)   SgemmConfig (cutlass::gemm)   
  v  
-
is_base_of_helper::dummy (cutlass::platform)   HgemmTraitsHelper (cutlass::gemm)   is_same< A, A > (cutlass::platform)   SgemmTraits (cutlass::gemm)   
default_delete (cutlass::platform)   IdentityBlockSwizzle (cutlass::gemm)   Load< double, 2, Memory_, FragmentElementType::kScalar, double, kStride, 16 > (cutlass)   Shape (cutlass)   
default_delete< T[]> (cutlass::platform)   IdentityTensorMapFunc (cutlass)   Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, 1, 2 > (cutlass)   ShapeAdd (cutlass)   Vector (cutlass)   
DgemmConfig (cutlass::gemm)   IgemmConfig (cutlass::gemm)   Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 16 > (cutlass)   ShapeCount (cutlass)   Vector< bin1_t, kLanes_ > (cutlass)   
DgemmTraits (cutlass::gemm)   IgemmConfig< OutputTile_, int8_t, ThreadGemmShape_ > (cutlass::gemm)   Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 4 > (cutlass)   ShapeDiv (cutlass)   Vector< half, 1 > (cutlass)   
divide_assert (cutlass)   IgemmEpilogue (cutlass::gemm)   Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 8 > (cutlass)   ShapeDivCeiling (cutlass)   Vector< half, kLanes_ > (cutlass)   
is_base_of_helper::dummy (cutlass::platform)   IgemmEpilogue< GemmEpilogueTraits_, true > (cutlass::gemm)   Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size > (cutlass)   ShapeMax (cutlass)   Vector< int4_t, kLanes_ > (cutlass)   
DumpType (cutlass)   IgemmEpilogueScalar (cutlass::gemm)   Load< Vector< bin1_t, 32 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size > (cutlass)   ShapeMin (cutlass)   Vector< uint4_t, kLanes_ > (cutlass)   
  e  
-
HgemmTransformerA (cutlass::gemm)   is_trivially_copyable (cutlass::platform)   Shape (cutlass)   Vector (cutlass)   
HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > (cutlass::gemm)   is_void (cutlass::platform)   ShapeAdd (cutlass)   Vector< half, kLanes_ > (cutlass)   
enable_if (cutlass::platform)   HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > (cutlass::gemm)   is_volatile (cutlass::platform)   ShapeCount (cutlass)   Vectorize (cutlass)   
enable_if< false, T > (cutlass::platform)   HgemmTransformerB (cutlass::gemm)   is_volatile< volatile T > (cutlass::platform)   ShapeDiv (cutlass)   Vectorize< Element_, 1 > (cutlass)   
Extent (cutlass)   HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > (cutlass::gemm)   PredicateVector::Iterator (cutlass)   ShapeMax (cutlass)   VectorTraits (cutlass)   
Extent< Vector< T, Lanes > > (cutlass)   HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > (cutlass::gemm)   IteratorAdvance (cutlass)   ShapeMin (cutlass)   VectorTraits< Vector< T, Lanes > > (cutlass)   
Extent< Vector< T, Lanes > const > (cutlass)   
  i  
-
IteratorFragment (cutlass)   ShapeMul (cutlass)   VectorTraits< Vector< T, Lanes > const > (cutlass)   
IgemmEpilogueScalar< int > (cutlass::gemm)   Load< Vector< int4_t, 8 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size > (cutlass)   ShapeMul (cutlass)   Vectorize (cutlass)   
IgemmEpilogueTraits (cutlass::gemm)   Load< Vector< uint4_t, 8 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size > (cutlass)   ShapeScale (cutlass)   Vectorize< Vector< bin1_t, 32 >, kLanes_ > (cutlass)   
enable_if (cutlass::platform)   IgemmEpilogueTraitsHelper (cutlass::gemm)   log2_down (cutlass)   ShapeStrides (cutlass)   Vectorize< Vector< int4_t, 8 >, kLanes_ > (cutlass)   
enable_if< false, T > (cutlass::platform)   IgemmFloatToInt8Converter (cutlass::gemm)   log2_down< N, 1, Count > (cutlass)   ShapeSub (cutlass)   Vectorize< Vector< uint4_t, 8 >, kLanes_ > (cutlass)   
Extent (cutlass)   IgemmGlobalIteratorAb (cutlass::gemm)   log2_up (cutlass)   SharedLoadStream (cutlass::gemm)   VectorTraits (cutlass)   
Extent< Vector< T, Lanes > > (cutlass)   IgemmGlobalLoadTransformer (cutlass::gemm)   log2_up< N, 1, Count > (cutlass)   GemmEpilogueTraits::SharedStorage (cutlass::gemm)   VectorTraits< Vector< T, Lanes > > (cutlass)   
Extent< Vector< T, Lanes > const > (cutlass)   IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float > (cutlass::gemm)   
  m  
+
GlobalLoadStreamPair::SharedStorage (cutlass::gemm)   VectorTraits< Vector< T, Lanes > const > (cutlass)   
  f  
-
  l  
-
ShapeScale (cutlass)   
  w  
+
IgemmGlobalStoreTransformer (cutlass::gemm)   GemmTraits::SharedStorage (cutlass::gemm)   
  w  
Identity (cutlass)   ShapeStrides (cutlass)   
Fragment (cutlass)   IdentityBlockSwizzle (cutlass::gemm)   less (cutlass::platform)   ShapeSub (cutlass)   WmmaGemmGlobalIteratorCd (cutlass::gemm)   
FragmentConstIterator (cutlass)   IgemmConfig (cutlass::gemm)   LinearScaling (cutlass::gemm)   GemmTraits::SharedLoadStream (cutlass::gemm)   WmmaGemmGlobalIteratorCdTraits (cutlass::gemm)   
FragmentIterator (cutlass)   IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ > (cutlass::gemm)   Load (cutlass)   SharedLoadStream (cutlass::gemm)   
FragmentLoad (cutlass)   IgemmContiguousGlobalTileTraits (cutlass::gemm)   Load< double, 2, Memory_, true, 16 > (cutlass)   ClearAccumulators::SharedStorage (cutlass::gemm)   
FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > (cutlass)   IgemmEpilogue (cutlass::gemm)   Load< Scalar_, Lanes_, Memory_, true, 16 > (cutlass)   GemmEpilogueTraits::SharedStorage (cutlass::gemm)   
FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > (cutlass)   IgemmEpilogue< GemmEpilogueTraits_, true > (cutlass::gemm)   Load< Scalar_, Lanes_, Memory_, true, 4 > (cutlass)   GemmTraits::SharedStorage (cutlass::gemm)   
IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > > (cutlass::gemm)   GemmTraits::MainLoopSharedStorage (cutlass::gemm)   GlobalLoadStream::SharedStorage (cutlass::gemm)   
Fp16SgemmConfig (cutlass::gemm)   IgemmGlobalTileTraits (cutlass::gemm)   MatrixCoord (cutlass)   ClearAccumulators::SharedStorage (cutlass::gemm)   WmmaGemmGlobalIteratorCd (cutlass::gemm)   
Fp16SgemmSgemmTraits (cutlass::gemm)   IgemmInt8ToFloatConverter (cutlass::gemm)   MatrixLayout (cutlass)   SharedStreamPair (cutlass::gemm)   WmmaGemmGlobalIteratorCdTraits (cutlass::gemm)   
Fragment (cutlass)   IgemmSharedStoreTransformer (cutlass::gemm)   MatrixTransform (cutlass)   SimplifiedGemmEpilogueTraits (cutlass::gemm)   
  z  
+
FragmentConstIterator (cutlass)   IgemmSwizzle (cutlass::gemm)   Max (cutlass)   SimplifiedGemmTraits (cutlass::gemm)   
FragmentElementType (cutlass)   IgemmTileTraitsHelperA (cutlass::gemm)   MemorySpace (cutlass)   SimplifiedGemmTraitsHelper (cutlass::gemm)   ZipConvert (cutlass)   
FragmentIterator (cutlass)   IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_, Index_ > (cutlass::gemm)   Min (cutlass)   sqrt_est (cutlass)   ZipFragment (cutlass)   
FragmentMultiplyAdd (cutlass::gemm)   IgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_, Index_ > (cutlass::gemm)   
  n  
+
StorageType (cutlass)   ZipTensorRef (cutlass)   
FragmentMultiplyAdd< half, half, true > (cutlass::gemm)   IgemmTileTraitsHelperB (cutlass::gemm)   StorageType< 1 > (cutlass)   ZipTileAllocation (cutlass)   
  g  
+
IgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_, Index_ > (cutlass::gemm)   nullptr_t (cutlass::platform)   StorageType< 2 > (cutlass)   ZipTileIterator (cutlass)   
IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_, Index_ > (cutlass::gemm)   
  p  
+
StorageType< 4 > (cutlass)   
Gemm (cutlass::gemm)   IgemmTraits (cutlass::gemm)   Store (cutlass)   
alignment_of::pad (cutlass::platform)   
-
a | b | c | d | e | f | g | h | i | l | m | n | p | r | s | t | u | v | w
+
a | b | c | d | e | f | g | h | i | k | l | m | n | p | r | s | t | u | v | w | z
diff --git a/docs/clear__accumulators_8h.html b/docs/clear__accumulators_8h.html index b4bd3b39c..cd8f6307a 100644 --- a/docs/clear__accumulators_8h.html +++ b/docs/clear__accumulators_8h.html @@ -82,7 +82,7 @@ $(function() {

Defines abstractions for efficiently clearing accumulator tiles. More...

-
#include <cutlass/vector.h>
+
#include "cutlass/vector.h"

Go to the source code of this file.

@@ -104,7 +104,7 @@ Namespaces diff --git a/docs/clear__accumulators_8h_source.html b/docs/clear__accumulators_8h_source.html index 1a6f517fb..7c0423a5f 100644 --- a/docs/clear__accumulators_8h_source.html +++ b/docs/clear__accumulators_8h_source.html @@ -76,16 +76,17 @@ $(function() {
clear_accumulators.h
-Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/vector.h>
31 
32 namespace cutlass {
33 namespace gemm {
34 
36 
37 template <typename Scalar_, int kLanes_ = 1>
40  struct SharedStorage {};
41 
43  CUTLASS_DEVICE ClearAccumulators(SharedStorage& shared_storage) {}
44 
46  template <typename Fragment_>
47  CUTLASS_DEVICE void clear(Fragment_& fragment) {
48  fragment.clear();
49  }
50 };
51 
53 
54 } // namespace gemm
55 } // namespace cutlass
Definition: convert.h:33
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include "cutlass/vector.h"
31 
32 namespace cutlass {
33 namespace gemm {
34 
36 
37 template <typename Scalar_, int kLanes_ = 1>
40  struct SharedStorage {};
41 
43  CUTLASS_DEVICE ClearAccumulators(SharedStorage& shared_storage) {}
44 
46  CUTLASS_DEVICE ClearAccumulators() {}
47 
49  template <typename Fragment_>
50  CUTLASS_DEVICE void clear(Fragment_& fragment) {
51  fragment.clear();
52  }
53 };
54 
56 
57 } // namespace gemm
58 } // namespace cutlass
Definition: convert.h:33
Definition: clear_accumulators.h:38
CUTLASS_DEVICE ClearAccumulators(SharedStorage &shared_storage)
Ctor.
Definition: clear_accumulators.h:43
Defines a 1D vector of elements held in the registers of each thread.
-
CUTLASS_DEVICE void clear(Fragment_ &fragment)
Clear the fragment.
Definition: clear_accumulators.h:47
+
CUTLASS_DEVICE void clear(Fragment_ &fragment)
Clear the fragment.
Definition: clear_accumulators.h:50
The shared storage.
Definition: clear_accumulators.h:40
+
CUTLASS_DEVICE ClearAccumulators()
Ctor.
Definition: clear_accumulators.h:46
diff --git a/docs/complex_8h.html b/docs/complex_8h.html new file mode 100644 index 000000000..e94494d21 --- /dev/null +++ b/docs/complex_8h.html @@ -0,0 +1,263 @@ + + + + + + + +Cutlass: complex.h File Reference + + + + + + + + + + +
+
+
+ + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + + +
+ +
+
complex.h File Reference
+
+
+
#include <cuComplex.h>
+#include "cutlass/cutlass.h"
+#include <iosfwd>
+
+

Go to the source code of this file.

+ + + + +

+Classes

class  cutlass::platform::complex< T >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::platform
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Functions

CUTLASS_HOST_DEVICE float const & cutlass::platform::real (cuFloatComplex const &z)
 Returns the real part of the complex number. More...
 
CUTLASS_HOST_DEVICE float & cutlass::platform::real (cuFloatComplex &z)
 Returns the real part of the complex number. More...
 
CUTLASS_HOST_DEVICE double const & cutlass::platform::real (cuDoubleComplex const &z)
 Returns the real part of the complex number. More...
 
CUTLASS_HOST_DEVICE double & cutlass::platform::real (cuDoubleComplex &z)
 Returns the real part of the complex number. More...
 
CUTLASS_HOST_DEVICE float const & cutlass::platform::imag (cuFloatComplex const &z)
 Returns the imaginary part of the complex number. More...
 
CUTLASS_HOST_DEVICE float & cutlass::platform::imag (cuFloatComplex &z)
 Returns the imaginary part of the complex number. More...
 
CUTLASS_HOST_DEVICE double const & cutlass::platform::imag (cuDoubleComplex const &z)
 Returns the imaginary part of the complex number. More...
 
CUTLASS_HOST_DEVICE double & cutlass::platform::imag (cuDoubleComplex &z)
 Returns the imaginary part of the complex number. More...
 
template<typename T >
CUTLASS_HOST_DEVICE T const & cutlass::platform::real (complex< T > const &z)
 Returns the real part of the complex number. More...
 
template<typename T >
CUTLASS_HOST_DEVICE T & cutlass::platform::real (complex< T > &z)
 Returns the real part of the complex number. More...
 
template<typename T >
CUTLASS_HOST_DEVICE T const & cutlass::platform::imag (complex< T > const &z)
 Returns the imaginary part of the complex number. More...
 
template<typename T >
CUTLASS_HOST_DEVICE T & cutlass::platform::imag (complex< T > &z)
 Returns the imaginary part of the complex number. More...
 
template<typename T >
std::ostream & cutlass::platform::operator<< (std::ostream &out, complex< T > const &z)
 
template<typename T >
CUTLASS_HOST_DEVICE bool cutlass::platform::operator== (complex< T > const &lhs, complex< T > const &rhs)
 Equality operator. More...
 
template<typename T >
CUTLASS_HOST_DEVICE bool cutlass::platform::operator!= (complex< T > const &lhs, complex< T > const &rhs)
 Inequality operator. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::operator+ (complex< T > const &lhs, complex< T > const &rhs)
 Addition. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::operator- (complex< T > const &lhs, complex< T > const &rhs)
 Subtraction. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::operator* (complex< T > const &lhs, complex< T > const &rhs)
 Multiplication. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::operator* (complex< T > const &lhs, T const &s)
 Scalar Multiplication. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::operator* (T const &s, complex< T > const &rhs)
 Scalar Multiplication. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::operator/ (complex< T > const &lhs, complex< T > const &rhs)
 Division. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::operator/ (complex< T > const &lhs, T const &s)
 Scalar Division. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::operator/ (T const &s, complex< T > const &rhs)
 Scalar divided by complex. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > & cutlass::platform::operator+= (complex< T > &lhs, complex< T > const &rhs)
 Addition. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > & cutlass::platform::operator-= (complex< T > &lhs, complex< T > const &rhs)
 Subtraction. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > & cutlass::platform::operator*= (complex< T > &lhs, complex< T > const &rhs)
 Multiplication. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > & cutlass::platform::operator*= (complex< T > &lhs, T s)
 Scalar multiplication. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > & cutlass::platform::operator/= (complex< T > &lhs, complex< T > const &rhs)
 Division. More...
 
template<typename T >
CUTLASS_HOST_DEVICEcutlass::platform::abs (complex< T > const &z)
 Returns the magnitude of the complex number. More...
 
template<typename T >
CUTLASS_HOST_DEVICEcutlass::platform::arg (complex< T > const &z)
 Returns the magnitude of the complex number. More...
 
template<typename T >
CUTLASS_HOST_DEVICEcutlass::platform::norm (complex< T > const &z)
 Returns the squared magnitude. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::conj (complex< T > const &z)
 Returns the complex conjugate. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::proj (complex< T > const &z)
 Projects the complex number z onto the Riemann sphere. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::polar (T const &r, T const &theta=T())
 Returns a complex number with magnitude r and phase theta. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::exp (complex< T > const &z)
 Computes the complex exponential of z. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::log (complex< T > const &z)
 Computes the complex exponential of z. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::log10 (complex< T > const &z)
 Computes the complex exponential of z. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::sqrt (complex< T > const &z)
 Computes the square root of complex number z. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::cos (complex< T > const &z)
 Computes the cosine of complex z. More...
 
template<typename T >
CUTLASS_HOST_DEVICE complex< T > cutlass::platform::sin (complex< T > const &z)
 Computes the sin of complex z. More...
 
+
+ + + + diff --git a/docs/complex_8h_source.html b/docs/complex_8h_source.html new file mode 100644 index 000000000..6270d22da --- /dev/null +++ b/docs/complex_8h_source.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: complex.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
complex.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 #pragma once
26 
27 #include <cuComplex.h>
28 #include "cutlass/cutlass.h"
29 #include <iosfwd>
30 
31 namespace cutlass {
32 namespace platform {
33 
35 
36 //
37 // Accessors for CUDA complex types
38 //
39 
41 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
42  // host-only type
44 float const &real(cuFloatComplex const &z) { return z.x; }
45 
47 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
48  // host-only type
50 float &real(cuFloatComplex &z) { return z.x; }
51 
53 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
54  // host-only type
56 double const &real(cuDoubleComplex const &z) { return z.x; }
57 
59 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
60  // host-only type
62 double &real(cuDoubleComplex &z) { return z.x; }
63 
65 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
66  // host-only type
68 float const &imag(cuFloatComplex const &z) { return z.y; }
69 
71 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
72  // host-only type
74 float &imag(cuFloatComplex &z) { return z.y; }
75 
77 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
78  // host-only type
80 double const &imag(cuDoubleComplex const &z) { return z.y; }
81 
83 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
84  // host-only type
86 double &imag(cuDoubleComplex &z) { return z.y; }
87 
89 
92 template <typename T>
93 class complex {
94  public:
96  typedef T value_type;
97 
98  private:
99  //
100  // Data members
101  //
102 
104  T _real;
105 
107  T _imag;
108 
109  public:
110 //
111 // Methods
112 //
113 
115 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
116  // host-only type
118  complex(T r = T(0), T i = T(0)) : _real(r), _imag(i) {}
119 
121 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
122  // host-only type
124  complex(cuFloatComplex const &z) : _real(platform::real(z)), _imag(platform::imag(z)) {}
125 
127 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
128  // host-only type
130  complex(cuDoubleComplex const &z) : _real(platform::real(z)), _imag(platform::imag(z)) {}
131 
133 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
134  // host-only type
136  T const &real() const { return _real; }
137 
139 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
140  // host-only type
142  T &real() { return _real; }
143 
145 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
146  // host-only type
148  T const &imag() const { return _imag; }
149 
151 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
152  // host-only type
154  T &imag() { return _imag; }
155 
157 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
158  // host-only type
160  operator cuFloatComplex() const { return make_cuFloatComplex(real(), imag()); }
161 
163 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
164  // host-only type
166  operator cuDoubleComplex() const { return make_cuDoubleComplex(real(), imag()); }
167 };
168 
169 //
170 // Accessors for complex template
171 //
172 
174 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
175  // host-only type
176 template <typename T>
177 CUTLASS_HOST_DEVICE T const &real(complex<T> const &z) {
178  return z.real();
179 }
180 
182 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
183  // host-only type
184 template <typename T>
186  return z.real();
187 }
188 
190 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
191  // host-only type
192 template <typename T>
193 CUTLASS_HOST_DEVICE T const &imag(complex<T> const &z) {
194  return z.imag();
195 }
196 
198 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
199  // host-only type
200 template <typename T>
202  return z.imag();
203 }
204 
205 //
206 // Output operators
207 //
208 
209 template <typename T>
210 std::ostream &operator<<(std::ostream &out, complex<T> const &z) {
211  T _r = real(z);
212  T _i = imag(z);
213  return out << _r << "+i" << _i;
214 }
215 
216 //
217 // Non-member operators defined for complex types
218 //
219 
221 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
222  // host-only type
223 template <typename T>
224 CUTLASS_HOST_DEVICE bool operator==(complex<T> const &lhs, complex<T> const &rhs) {
225  return real(lhs) == (rhs) && imag(lhs) == imag(rhs);
226 }
227 
229 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
230  // host-only type
231 template <typename T>
232 CUTLASS_HOST_DEVICE bool operator!=(complex<T> const &lhs, complex<T> const &rhs) {
233  return !(lhs == rhs);
234 }
235 
237 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
238  // host-only type
239 template <typename T>
241  return complex<T>(real(lhs) + real(rhs), imag(lhs) + imag(rhs));
242 }
243 
245 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
246  // host-only type
247 template <typename T>
249  return complex<T>(real(lhs) - real(rhs), imag(lhs) - imag(rhs));
250 }
251 
253 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
254  // host-only type
255 template <typename T>
257  return complex<T>(real(lhs) * real(rhs) - imag(lhs) * imag(rhs),
258  real(lhs) * imag(rhs) + imag(lhs) * real(rhs));
259 }
260 
262 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
263  // host-only type
264 template <typename T>
266  return complex<T>(real(lhs) * s, imag(lhs) * s);
267 }
268 
270 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
271  // host-only type
272 template <typename T>
274  return complex<T>(s * real(rhs), s * imag(rhs));
275 }
276 
278 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
279  // host-only type
280 template <typename T>
282  T d = (real(rhs) * (rhs) + imag(rhs) * imag(rhs));
283 
284  return complex<T>((real(lhs) * (rhs) + imag(lhs) * imag(rhs)) / d,
285  (imag(lhs) * (rhs)-real(lhs) * imag(rhs)) / d);
286 }
287 
289 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
290  // host-only type
291 template <typename T>
293  return complex<T>(real(lhs) / s, imag(lhs) / s);
294 }
295 
297 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
298  // host-only type
299 template <typename T>
301  T d = (real(rhs) * (rhs) + imag(rhs) * imag(rhs));
302 
303  return complex<T>((s * (rhs)) / d, -(s * imag(rhs)) / d);
304 }
305 
307 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
308  // host-only type
309 template <typename T>
311  lhs = (lhs + rhs);
312  return lhs;
313 }
314 
316 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
317  // host-only type
318 template <typename T>
320  lhs = (lhs - rhs);
321  return lhs;
322 }
323 
325 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
326  // host-only type
327 template <typename T>
329  lhs = (lhs * rhs);
330  return lhs;
331 }
332 
334 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
335  // host-only type
336 template <typename T>
338  lhs = (lhs * s);
339  return lhs;
340 }
341 
343 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
344  // host-only type
345 template <typename T>
347  lhs = (lhs / rhs);
348  return lhs;
349 }
350 
351 //
352 // Non-member functions defined for complex numbers
353 //
354 
356 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
357  // host-only type
358 template <typename T>
360  return sqrt(norm(z));
361 }
362 
364 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
365  // host-only type
366 template <typename T>
368  return atan2(imag(z), real(z));
369 }
370 
372 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
373  // host-only type
374 template <typename T>
376  return real(z) * real(z) + imag(z) * imag(z);
377 }
378 
380 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
381  // host-only type
382 template <typename T>
384  return complex<T>(real(z), -imag(z));
385 }
386 
388 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
389  // host-only type
390 template <typename T>
392  T d = real(z) * real(z) + imag(z) * imag(z) + T(1);
393  return complex<T>((T(2) * real(z)) / d, (T(2) * imag(z)) / d);
394 }
395 
397 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
398  // host-only type
399 template <typename T>
400 CUTLASS_HOST_DEVICE complex<T> polar(T const &r, T const &theta = T()) {
401  return complex<T>(r * cos(theta), r * sin(theta));
402 }
403 
405 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
406  // host-only type
407 template <typename T>
409  return complex<T>(real(z) * cos(imag(z)), real(z) * sin(imag(z)));
410 }
411 
413 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
414  // host-only type
415 template <typename T>
417  return complex<T>(log(abs(z)), arg(z));
418 }
419 
421 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
422  // host-only type
423 template <typename T>
425  return log(z) / T(log(T(10)));
426 }
427 
429 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
430  // host-only type
431 template <typename T>
433  return sqrt(T(2)) / T(2) *
434  complex<T>(sqrt(sqrt(norm(z)) + real(z)),
435  (imag(z) < 0 ? T(-1) : T(1)) * sqrt(sqrt(norm(z)) - real(z)));
436 }
437 
439 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
440  // host-only type
441 template <typename T>
443  return (exp(z) + exp(-z)) / T(2);
444 }
445 
447 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a
448  // host-only type
449 template <typename T>
451  return (exp(-z) - exp(z)) * complex<T>(T(0), T(1) / T(2));
452 }
453 
455 
456 } // namespace platform
457 } // namespace cutlass
CUTLASS_HOST_DEVICE complex< T > proj(complex< T > const &z)
Projects the complex number z onto the Riemann sphere.
Definition: complex.h:391
+
Definition: convert.h:33
+
CUTLASS_HOST_DEVICE T & imag()
Accesses the imaginary part of the complex number.
Definition: complex.h:154
+
CUTLASS_HOST_DEVICE bool operator==(complex< T > const &lhs, complex< T > const &rhs)
Equality operator.
Definition: complex.h:224
+
CUTLASS_HOST_DEVICE T const & imag() const
Accesses the imaginary part of the complex number.
Definition: complex.h:148
+
CUTLASS_HOST_DEVICE complex< T > operator*(complex< T > const &lhs, complex< T > const &rhs)
Multiplication.
Definition: complex.h:256
+
CUTLASS_HOST_DEVICE complex< T > & operator-=(complex< T > &lhs, complex< T > const &rhs)
Subtraction.
Definition: complex.h:319
+
CUTLASS_HOST_DEVICE complex< T > operator-(complex< T > const &lhs, complex< T > const &rhs)
Subtraction.
Definition: complex.h:248
+
CUTLASS_HOST_DEVICE T & real()
Accesses the real part of the complex number.
Definition: complex.h:142
+
CUTLASS_HOST_DEVICE float const & real(cuFloatComplex const &z)
Returns the real part of the complex number.
Definition: complex.h:44
+
CUTLASS_HOST_DEVICE complex< T > sin(complex< T > const &z)
Computes the sin of complex z.
Definition: complex.h:450
+
CUTLASS_HOST_DEVICE complex(cuFloatComplex const &z)
Conversion from cuFloatComplex.
Definition: complex.h:124
+
CUTLASS_HOST_DEVICE complex< T > cos(complex< T > const &z)
Computes the cosine of complex z.
Definition: complex.h:442
+
CUTLASS_HOST_DEVICE complex< T > operator+(complex< T > const &lhs, complex< T > const &rhs)
Addition.
Definition: complex.h:240
+
CUTLASS_HOST_DEVICE complex< T > polar(T const &r, T const &theta=T())
Returns a complex number with magnitude r and phase theta.
Definition: complex.h:400
+
CUTLASS_HOST_DEVICE T const & real() const
Accesses the real part of the complex number.
Definition: complex.h:136
+
CUTLASS_HOST_DEVICE complex< T > & operator/=(complex< T > &lhs, complex< T > const &rhs)
Division.
Definition: complex.h:346
+
CUTLASS_HOST_DEVICE complex< T > sqrt(complex< T > const &z)
Computes the square root of complex number z.
Definition: complex.h:432
+
CUTLASS_HOST_DEVICE complex< T > & operator+=(complex< T > &lhs, complex< T > const &rhs)
Addition.
Definition: complex.h:310
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
CUTLASS_HOST_DEVICE float const & imag(cuFloatComplex const &z)
Returns the imaginary part of the complex number.
Definition: complex.h:68
+
CUTLASS_HOST_DEVICE complex< T > exp(complex< T > const &z)
Computes the complex exponential of z.
Definition: complex.h:408
+
CUTLASS_HOST_DEVICE complex< T > log10(complex< T > const &z)
Computes the complex exponential of z.
Definition: complex.h:424
+
CUTLASS_HOST_DEVICE T norm(complex< T > const &z)
Returns the squared magnitude.
Definition: complex.h:375
+
CUTLASS_HOST_DEVICE bool operator!=(complex< T > const &lhs, complex< T > const &rhs)
Inequality operator.
Definition: complex.h:232
+
CUTLASS_HOST_DEVICE T abs(complex< T > const &z)
Returns the magnitude of the complex number.
Definition: complex.h:359
+
CUTLASS_HOST_DEVICE complex< T > & operator*=(complex< T > &lhs, complex< T > const &rhs)
Multiplication.
Definition: complex.h:328
+
CUTLASS_HOST_DEVICE complex(cuDoubleComplex const &z)
Conversion from cuDoubleComplex.
Definition: complex.h:130
+
CUTLASS_HOST_DEVICE T arg(complex< T > const &z)
Returns the magnitude of the complex number.
Definition: complex.h:367
+
CUTLASS_HOST_DEVICE complex(T r=T(0), T i=T(0))
Constructor.
Definition: complex.h:118
+
Definition: complex.h:93
+
CUTLASS_HOST_DEVICE complex< T > log(complex< T > const &z)
Computes the complex exponential of z.
Definition: complex.h:416
+
T value_type
Type alias for scalar type.
Definition: complex.h:96
+
Basic include for CUTLASS macros.
+
CUTLASS_HOST_DEVICE complex< T > operator/(complex< T > const &lhs, complex< T > const &rhs)
Division.
Definition: complex.h:281
+
CUTLASS_HOST_DEVICE complex< T > conj(complex< T > const &z)
Returns the complex conjugate.
Definition: complex.h:383
+
+ + + + diff --git a/docs/convert_8h.html b/docs/convert_8h.html index 422c52017..cd3bf4bb8 100644 --- a/docs/convert_8h.html +++ b/docs/convert_8h.html @@ -82,7 +82,7 @@ $(function() {

Defines conversion operations among Fragments of different base type. More...

-
#include <cutlass/fragment.h>
+
#include "cutlass/fragment.h"

Go to the source code of this file.

@@ -103,7 +103,7 @@ Namespaces diff --git a/docs/convert_8h_source.html b/docs/convert_8h_source.html index 6e877d293..22ec9d4b8 100644 --- a/docs/convert_8h_source.html +++ b/docs/convert_8h_source.html @@ -76,7 +76,7 @@ $(function() {
convert.h
-Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cutlass/fragment.h>
32 
33 namespace cutlass {
34 
36 
37 template <typename InputFragment_, typename OutputFragment_>
38 struct Convert {};
39 
41 
42 template <typename InputScalar_, typename OutputScalar_, int kScalars_>
43 struct Convert<Fragment<InputScalar_, kScalars_>, Fragment<OutputScalar_, kScalars_> > {
48 
50  CUTLASS_DEVICE Convert() {}
51 
53  CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) {
54  transform(src, 0, dst);
55  }
56 
58  template <typename Fragment_>
59  CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) {
60  for (int i = 0; i < kScalars_; ++i) {
61  dst[i] = static_cast<OutputScalar_>(src[i + offset]);
62  }
63  }
64 };
65 
67 
68 template <typename Fragment_>
69 struct Copy {
71  typedef Fragment_ InputFragment;
73  typedef Fragment_ OutputFragment;
74 
76  CUTLASS_DEVICE Copy() {}
77 
79  CUTLASS_DEVICE void transform(Fragment_ const& src, Fragment_& dst) { transform(src, 0, dst); }
80 
82  template <typename InputFragment_>
83  CUTLASS_DEVICE void transform(InputFragment_ const& src, int offset, Fragment_& dst) {
84  if (sizeof(typename Fragment_::Element) == 8) {
85  uint64_t const* src_ptr = reinterpret_cast<uint64_t const*>(&src[offset]);
86  uint64_t* dst_ptr = reinterpret_cast<uint64_t*>(&dst[0]);
87  for (int i = 0; i < sizeof(Fragment_) / 8; ++i) {
88  dst_ptr[i] = src_ptr[i];
89  }
90  } else {
91  uint32_t const* src_ptr = reinterpret_cast<uint32_t const*>(&src[offset]);
92  uint32_t* dst_ptr = reinterpret_cast<uint32_t*>(&dst[0]);
93  for (int i = 0; i < sizeof(Fragment_) / 4; ++i) {
94  dst_ptr[i] = src_ptr[i];
95  }
96  }
97  }
98 };
99 
101 
102 } // namespace cutlass
Definition: convert.h:33
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include "cutlass/fragment.h"
32 
33 namespace cutlass {
34 
36 
37 template <typename InputFragment_, typename OutputFragment_>
38 struct Convert {};
39 
41 
42 template <typename InputScalar_, typename OutputScalar_, int kScalars_>
43 struct Convert<Fragment<InputScalar_, kScalars_>, Fragment<OutputScalar_, kScalars_> > {
48 
50  CUTLASS_DEVICE Convert() {}
51 
53  CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) {
54  transform(src, 0, dst);
55  }
56 
58  template <typename Fragment_>
59  CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) {
60  for (int i = 0; i < kScalars_; ++i) {
61  dst[i] = static_cast<OutputScalar_>(src[i + offset]);
62  }
63  }
64 };
65 
67 
68 template <typename Fragment_>
69 struct Copy {
71  typedef Fragment_ InputFragment;
73  typedef Fragment_ OutputFragment;
74 
76  CUTLASS_DEVICE Copy() {}
77 
79  CUTLASS_DEVICE void transform(Fragment_ const& src, Fragment_& dst) { transform(src, 0, dst); }
80 
82  template <typename InputFragment_>
83  CUTLASS_DEVICE void transform(InputFragment_ const& src, int offset, Fragment_& dst) {
84  if (sizeof(typename Fragment_::Element) == 8) {
85  uint64_t const* src_ptr = reinterpret_cast<uint64_t const*>(&src[offset]);
86  uint64_t* dst_ptr = reinterpret_cast<uint64_t*>(&dst[0]);
87  for (int i = 0; i < sizeof(Fragment_) / 8; ++i) {
88  dst_ptr[i] = src_ptr[i];
89  }
90  } else {
91  uint32_t const* src_ptr = reinterpret_cast<uint32_t const*>(&src[offset]);
92  uint32_t* dst_ptr = reinterpret_cast<uint32_t*>(&dst[0]);
93  for (int i = 0; i < sizeof(Fragment_) / 4; ++i) {
94  dst_ptr[i] = src_ptr[i];
95  }
96  }
97  }
98 };
99 
101 
102 } // namespace cutlass
Definition: convert.h:33
Fragment< OutputScalar_, kScalars_ > OutputFragment
The output fragment.
Definition: convert.h:47
Definition: convert.h:69
CUTLASS_DEVICE void transform(Fragment_ const &src, Fragment_ &dst)
Transform a fragment.
Definition: convert.h:79
@@ -94,7 +94,7 @@ $(function() {
diff --git a/docs/coord_8h.html b/docs/coord_8h.html index 516503867..8bb9bea4d 100644 --- a/docs/coord_8h.html +++ b/docs/coord_8h.html @@ -83,7 +83,8 @@ $(function() {

A Coord is a coordinate of arbitrary rank into a tensor or matrix. More...

-
@@ -92,7 +93,7 @@ Classes - +
struct  cutlass::Identity
 Describes identity elements. More...
 
struct  cutlass::Coord< N_ >
struct  cutlass::Coord< Rank_, Index_ >
 Statically-sized array specifying Coords within a tensor. More...
 
@@ -115,23 +116,14 @@ Functions - - - - - - - - - - - - + + +
CUTLASS_HOST_DEVICE Coord< 4 > cutlass::make_Coord (int _0, int _1, int _2, int _3)
 Helper to make a 4-element coordinate. More...
 
CUTLASS_HOST_DEVICE Coord< 2 > cutlass::get_Coord_hw (Coord< 3 > const &coord)
 Getter. More...
 
CUTLASS_HOST_DEVICE Coord< 2 > cutlass::get_Coord_hw (Coord< 4 > const &coord)
 Getter. More...
 
CUTLASS_HOST_DEVICE Coord< 3 > cutlass::get_Coord_hwc (Coord< 4 > const &coord)
 Getter. More...
 
CUTLASS_HOST_DEVICE Coord< 3 > cutlass::get_Coord_dhw (Coord< 4 > const &coord)
 Getter. More...
 
template<typename Shape_ >
CUTLASS_HOST_DEVICE Coord< 3 > cutlass::make_Coord_from_shape ()
 
diff --git a/docs/coord_8h_source.html b/docs/coord_8h_source.html index 71ec92e1a..b0e2162cc 100644 --- a/docs/coord_8h_source.html +++ b/docs/coord_8h_source.html @@ -76,50 +76,54 @@ $(function() {
coord.h
-Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cutlass/cutlass.h>
32 
33 namespace cutlass {
34 
36 
38 struct Identity {
41  enum Kind { Additive = 0, Multiplicative = 1 };
42 };
43 
45 
47 template <int N_>
48 struct Coord {
49  //
50  // Type and constant definitions
51  //
52 
53  static int const N = N_;
54 
55  //
56  // Data members
57  //
58 
60  int idx[N];
61 
62  //
63  // Methods
64  //
65 
68  Coord(int value = 0) {
69  for (int i = 0; i < N; ++i) {
70  idx[i] = value;
71  }
72  }
73 
76  Coord(int _idx[]) {
77  for (int i = 0; i < N; ++i) {
78  idx[i] = _idx[i];
79  }
80  }
81 
84  Coord operator+(Coord const& b) const {
85  Coord c;
86  for (int i = 0; i < N; ++i) {
87  c.idx[i] = idx[i] + b.idx[i];
88  }
89  return c;
90  }
91 
94  Coord operator-(Coord const& b) const {
95  Coord c;
96  for (int i = 0; i < N; ++i) {
97  c.idx[i] = idx[i] - b.idx[i];
98  }
99  return c;
100  }
101 
104  Coord operator*(Coord const& b) const {
105  Coord c;
106  for (int i = 0; i < N; ++i) {
107  c.idx[i] = idx[i] * b.idx[i];
108  }
109  return c;
110  }
111 
114  Coord operator/(Coord const& b) const {
115  Coord c;
116  for (int i = 0; i < N; ++i) {
117  c.idx[i] = idx[i] / b.idx[i];
118  }
119  return c;
120  }
121 
124  Coord& operator+=(Coord const& b) {
125  for (int i = 0; i < N; ++i) {
126  idx[i] += b.idx[i];
127  }
128  return *this;
129  }
130 
133  Coord& operator-=(Coord const& b) {
134  for (int i = 0; i < N; ++i) {
135  idx[i] -= b.idx[i];
136  }
137  return *this;
138  }
139 
142  Coord& operator*=(Coord const& b) {
143  for (int i = 0; i < N; ++i) {
144  idx[i] *= b.idx[i];
145  }
146  return *this;
147  }
148 
151  Coord& operator/=(Coord const& b) {
152  for (int i = 0; i < N; ++i) {
153  idx[i] /= b.idx[i];
154  }
155  return *this;
156  }
157 
159  CUTLASS_HOST_DEVICE int& operator[](int dim) { return idx[dim]; }
160 
162  CUTLASS_HOST_DEVICE int const& operator[](int dim) const { return idx[dim]; }
163 
165  template <typename T>
166  CUTLASS_HOST_DEVICE T dot(Coord const& b, T sum) const {
167  for (int i = 0; i < N; ++i) {
168  sum += idx[i] * b.idx[i];
169  }
170  return sum;
171  }
172 
174  template <typename T>
175  CUTLASS_HOST_DEVICE T dot(Coord const& b) const {
176  T sum = T(0);
177  for (int i = 0; i < N; ++i) {
178  sum += idx[i] * b.idx[i];
179  }
180  return sum;
181  }
182 
184  template <int Dim>
186  return idx[Dim];
187  }
188 
191  int& at(int dim) { return idx[dim]; }
192 
194  template <int Dim>
195  CUTLASS_HOST_DEVICE int const& at() const {
196  return idx[Dim];
197  }
198 
201  int const& at(int dim) const { return idx[dim]; }
202 
205  bool operator==(Coord<N> const& b) const {
206  bool equal = true;
207  for (int i = 0; equal && i < N; ++i) {
208  equal = (idx[i] == b.idx[i]);
209  }
210  return equal;
211  }
212 
215  bool operator!=(Coord<N> const& b) const { return !(*this == b); }
216 
219  Coord& clamp(Coord<N> const& max, Coord<N> const& min = Coord<N>()) {
220  for (int i = 0; i < N; ++i) {
221  idx[i] = __NV_STD_MAX(__NV_STD_MIN(idx[i], max.idx[i]), min.idx[i]);
222  }
223  return *this;
224  }
225 
228  int count() const {
229  int product = idx[0];
230  for (int i = 1; i < N; ++i) {
231  product *= idx[i];
232  }
233  return product;
234  }
235 };
236 
238 
242  int values[1] = {_0};
243  return Coord<1>(values);
244 }
245 
248 Coord<2> make_Coord(int _0, int _1) {
249  int values[2] = {_0, _1};
250  return Coord<2>(values);
251 }
252 
255 Coord<3> make_Coord(int _0, int _1, int _2) {
256  int values[3] = {_0, _1, _2};
257  return Coord<3>(values);
258 }
259 
262 Coord<4> make_Coord(int _0, int _1, int _2, int _3) {
263  int values[4] = {_0, _1, _2, _3};
264  return Coord<4>(values);
265 }
266 
268 
271 Coord<2> get_Coord_hw(Coord<3> const& coord) { return make_Coord(coord[1], coord[2]); }
272 
275 Coord<2> get_Coord_hw(Coord<4> const& coord) { return make_Coord(coord[1], coord[2]); }
276 
279 Coord<3> get_Coord_hwc(Coord<4> const& coord) { return make_Coord(coord[1], coord[2], coord[3]); }
280 
283 Coord<3> get_Coord_dhw(Coord<4> const& coord) { return make_Coord(coord[0], coord[1], coord[2]); }
284 
286 
287 } // namespace cutlass
CUTLASS_HOST_DEVICE int const & operator[](int dim) const
Member access operator.
Definition: coord.h:162
-
CUTLASS_HOST_DEVICE int count() const
Returns the product of all elements.
Definition: coord.h:228
-
Describes identity elements.
Definition: coord.h:38
-
CUTLASS_HOST_DEVICE constexpr const T & max(const T &a, const T &b)
std::max
Definition: platform.h:207
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include "cutlass/cutlass.h"
32 #include "cutlass/util/platform.h"
33 
34 namespace cutlass {
35 
37 
39 struct Identity {
42  enum Kind { Additive = 0, Multiplicative = 1 };
43 };
44 
46 
48 template <int Rank_, typename Index_ = int>
49 struct Coord {
50  //
51  // Type and constant definitions
52  //
53 
55  static int const kRank = Rank_;
56 
58  static int const N = Rank_;
59 
61  typedef Index_ Index;
62 
63  //
64  // Data members
65  //
66 
69 
70  //
71  // Methods
72  //
73 
76  Coord(Index value = 0) {
77  for (int i = 0; i < kRank; ++i) {
78  idx[i] = value;
79  }
80  }
81 
84  Coord(Index _idx[]) {
85  for (int i = 0; i < kRank; ++i) {
86  idx[i] = _idx[i];
87  }
88  }
89 
92  Coord(Coord<kRank> const &coord) {
93  for (int i = 0; i < kRank; ++i) {
94  idx[i] = coord[i];
95  }
96  }
97 
100  template <int Slice>
102  Coord<Slice> slice(int start = 0, Index identity = 0) const {
103  Coord<Slice> result;
104  for (int i = 0; i < Slice; ++i) {
105  if (i + start < kRank) {
106  slice[i] = idx[i + start];
107  }
108  else {
109  slice[i] = identity;
110  }
111  }
112  return result;
113  }
114 
117  operator bool() const {
118  for (int i = 0; i < kRank; ++i) {
119  if (idx[i]) {
120  return true;
121  }
122  }
123  return false;
124  }
125 
128  bool operator!() const {
129  for (int i = 0; i < kRank; ++i) {
130  if (idx[i]) {
131  return false;
132  }
133  }
134  return true;
135  }
136 
139  Coord operator+(Coord const& b) const {
140  Coord c;
141  for (int i = 0; i < kRank; ++i) {
142  c.idx[i] = idx[i] + b.idx[i];
143  }
144  return c;
145  }
146 
149  Coord operator-(Coord const& b) const {
150  Coord c;
151  for (int i = 0; i < kRank; ++i) {
152  c.idx[i] = idx[i] - b.idx[i];
153  }
154  return c;
155  }
156 
159  Coord operator*(Coord const& b) const {
160  Coord c;
161  for (int i = 0; i < kRank; ++i) {
162  c.idx[i] = idx[i] * b.idx[i];
163  }
164  return c;
165  }
166 
169  Coord operator/(Coord const& b) const {
170  Coord c;
171  for (int i = 0; i < kRank; ++i) {
172  c.idx[i] = idx[i] / b.idx[i];
173  }
174  return c;
175  }
176 
179  Coord& operator+=(Coord const& b) {
180  for (int i = 0; i < kRank; ++i) {
181  idx[i] += b.idx[i];
182  }
183  return *this;
184  }
185 
188  Coord& operator-=(Coord const& b) {
189  for (int i = 0; i < kRank; ++i) {
190  idx[i] -= b.idx[i];
191  }
192  return *this;
193  }
194 
197  Coord& operator*=(Coord const& b) {
198  for (int i = 0; i < kRank; ++i) {
199  idx[i] *= b.idx[i];
200  }
201  return *this;
202  }
203 
206  Coord& operator/=(Coord const& b) {
207  for (int i = 0; i < kRank; ++i) {
208  idx[i] /= b.idx[i];
209  }
210  return *this;
211  }
212 
214  CUTLASS_HOST_DEVICE Index& operator[](int dim) { return idx[dim]; }
215 
217  CUTLASS_HOST_DEVICE Index const& operator[](int dim) const { return idx[dim]; }
218 
220  template <typename T>
221  CUTLASS_HOST_DEVICE T dot(Coord const& b, T sum) const {
222  for (int i = 0; i < kRank; ++i) {
223  sum += idx[i] * b.idx[i];
224  }
225  return sum;
226  }
227 
229  template <typename T>
230  CUTLASS_HOST_DEVICE T dot(Coord const& b) const {
231  T sum = T(0);
232  for (int i = 0; i < kRank; ++i) {
233  sum += idx[i] * b.idx[i];
234  }
235  return sum;
236  }
237 
239  template <int Dim>
241  return idx[Dim];
242  }
243 
246  Index& at(int dim) { return idx[dim]; }
247 
249  template <int Dim>
250  CUTLASS_HOST_DEVICE Index const& at() const {
251  return idx[Dim];
252  }
253 
256  Index const& at(int dim) const { return idx[dim]; }
257 
260  bool operator==(Coord<kRank> const& b) const {
261  bool equal = true;
262  for (int i = 0; equal && i < kRank; ++i) {
263  equal = (idx[i] == b.idx[i]);
264  }
265  return equal;
266  }
267 
270  bool operator!=(Coord<kRank> const& b) const { return !(*this == b); }
271 
275  for (int i = 0; i < kRank; ++i) {
276  idx[i] = __NV_STD_MAX(__NV_STD_MIN(idx[i], max.idx[i]), min.idx[i]);
277  }
278  return *this;
279  }
280 
283  Index count() const {
284  Index product = idx[0];
285  for (int i = 1; i < kRank; ++i) {
286  product *= idx[i];
287  }
288  return product;
289  }
290 
293  bool operator<(Coord<kRank> const &b) const {
294  for (int i = 0; i < kRank; ++i) {
295  if (!(idx[i] < b[i])) {
296  return false;
297  }
298  }
299  return true;
300  }
301 
304  bool operator<=(Coord<kRank> const &b) const {
305  for (int i = 0; i < kRank; ++i) {
306  if (!(idx[i] <= b[i])) {
307  return false;
308  }
309  }
310  return true;
311  }
312 };
313 
315 
319  int values[1] = {_0};
320  return Coord<1>(values);
321 }
322 
325 Coord<2> make_Coord(int _0, int _1) {
326  int values[2] = {_0, _1};
327  return Coord<2>(values);
328 }
329 
332 Coord<3> make_Coord(int _0, int _1, int _2) {
333  int values[3] = {_0, _1, _2};
334  return Coord<3>(values);
335 }
336 
339 Coord<4> make_Coord(int _0, int _1, int _2, int _3) {
340  int values[4] = {_0, _1, _2, _3};
341  return Coord<4>(values);
342 }
343 
345 
346 template <typename Shape_>
348  return make_Coord(Shape_::kD, Shape_::kH, Shape_::kW);
349 }
350 
352 
353 } // namespace cutlass
Describes identity elements.
Definition: coord.h:39
+
CUTLASS_HOST_DEVICE constexpr const T & max(const T &a, const T &b)
std::max
Definition: platform.h:215
Definition: convert.h:33
-
CUTLASS_HOST_DEVICE bool operator==(Coord< N > const &b) const
Determines if two Coord<> objects are equal.
Definition: coord.h:205
-
CUTLASS_HOST_DEVICE Coord & operator+=(Coord const &b)
In-place addition.
Definition: coord.h:124
-
CUTLASS_HOST_DEVICE bool operator!=(Coord< N > const &b) const
Not equal.
Definition: coord.h:215
-
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
-
CUTLASS_HOST_DEVICE Coord< 3 > get_Coord_hwc(Coord< 4 > const &coord)
Getter.
Definition: coord.h:279
-
CUTLASS_HOST_DEVICE Coord< 3 > get_Coord_dhw(Coord< 4 > const &coord)
Getter.
Definition: coord.h:283
-
CUTLASS_HOST_DEVICE Coord & clamp(Coord< N > const &max, Coord< N > const &min=Coord< N >())
Clamps a coordinate to a range specified by maximum and minimum values.
Definition: coord.h:219
-
CUTLASS_HOST_DEVICE int const & at() const
Gets the index of a given Coord element.
Definition: coord.h:195
-
CUTLASS_HOST_DEVICE Coord operator/(Coord const &b) const
Element-wise division.
Definition: coord.h:114
-
Kind
Definition: coord.h:41
-
CUTLASS_HOST_DEVICE T dot(Coord const &b, T sum) const
Computes the dot product of two Coord instances.
Definition: coord.h:166
-
CUTLASS_HOST_DEVICE Coord(int _idx[])
Constructs from an array of integers.
Definition: coord.h:76
-
#define __NV_STD_MAX(a, b)
Select maximum(a, b)
Definition: platform.h:155
-
CUTLASS_HOST_DEVICE int & at(int dim)
Access via index; may limit unrolling potential.
Definition: coord.h:191
-
CUTLASS_HOST_DEVICE int & operator[](int dim)
Member access operator.
Definition: coord.h:159
-
CUTLASS_HOST_DEVICE Coord & operator-=(Coord const &b)
In-place subtraction.
Definition: coord.h:133
-
CUTLASS_HOST_DEVICE Coord operator*(Coord const &b) const
Element-wise multiplication.
Definition: coord.h:104
-
CUTLASS_HOST_DEVICE Coord(int value=0)
Default ctor initializes uniformly.
Definition: coord.h:68
-
CUTLASS_HOST_DEVICE Coord< 2 > get_Coord_hw(Coord< 3 > const &coord)
Getter.
Definition: coord.h:271
-
static int const N
Definition: coord.h:53
-
#define __NV_STD_MIN(a, b)
Select minimum(a, b)
Definition: platform.h:160
-
CUTLASS_HOST_DEVICE T dot(Coord const &b) const
Computes the dot product of two Coord instances.
Definition: coord.h:175
-
CUTLASS_HOST_DEVICE Coord operator-(Coord const &b) const
Element-wise subtraction.
Definition: coord.h:94
+
CUTLASS_HOST_DEVICE Coord operator-(Coord const &b) const
Element-wise subtraction.
Definition: coord.h:149
+
CUTLASS_HOST_DEVICE Index const & at(int dim) const
Access via index; may limit unrolling potential.
Definition: coord.h:256
+
CUTLASS_HOST_DEVICE Index const & operator[](int dim) const
Member access operator.
Definition: coord.h:217
+
CUTLASS_HOST_DEVICE Coord operator/(Coord const &b) const
Element-wise division.
Definition: coord.h:169
+
CUTLASS_HOST_DEVICE Index & operator[](int dim)
Member access operator.
Definition: coord.h:214
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:318
+
static int const kRank
Number of elements in Coord.
Definition: coord.h:55
+
Index_ Index
Index type used to store elements.
Definition: coord.h:61
+
CUTLASS_HOST_DEVICE Coord & operator*=(Coord const &b)
In-place multiplication.
Definition: coord.h:197
+
CUTLASS_HOST_DEVICE Index & at(int dim)
Access via index; may limit unrolling potential.
Definition: coord.h:246
+
C++ features that may be otherwise unimplemented for CUDA device functions.
+
CUTLASS_HOST_DEVICE Index count() const
Returns the product of all elements.
Definition: coord.h:283
+
CUTLASS_HOST_DEVICE Coord operator*(Coord const &b) const
Element-wise multiplication.
Definition: coord.h:159
+
Kind
Definition: coord.h:42
+
CUTLASS_HOST_DEVICE Coord< 3 > make_Coord_from_shape()
Definition: coord.h:347
+
CUTLASS_HOST_DEVICE bool operator==(Coord< kRank > const &b) const
Determines if two Coord<> objects are equal.
Definition: coord.h:260
+
static int const N
Number of elements in Coord, aliased for compatibility.
Definition: coord.h:58
+
#define __NV_STD_MAX(a, b)
Select maximum(a, b)
Definition: platform.h:163
+
Index idx[kRank]
Indices.
Definition: coord.h:68
+
#define __NV_STD_MIN(a, b)
Select minimum(a, b)
Definition: platform.h:168
+
CUTLASS_HOST_DEVICE Coord & operator-=(Coord const &b)
In-place subtraction.
Definition: coord.h:188
+
CUTLASS_HOST_DEVICE Coord & operator+=(Coord const &b)
In-place addition.
Definition: coord.h:179
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
-
CUTLASS_HOST_DEVICE constexpr const T & min(const T &a, const T &b)
std::min
Definition: platform.h:201
-
Definition: coord.h:41
-
Statically-sized array specifying Coords within a tensor.
Definition: coord.h:48
-
CUTLASS_HOST_DEVICE int & at()
Gets the index of a given Coord element.
Definition: coord.h:185
-
int idx[N]
Indices.
Definition: coord.h:60
-
Definition: coord.h:41
-
CUTLASS_HOST_DEVICE int const & at(int dim) const
Access via index; may limit unrolling potential.
Definition: coord.h:201
+
CUTLASS_HOST_DEVICE bool operator!=(Coord< kRank > const &b) const
Not equal.
Definition: coord.h:270
+
CUTLASS_HOST_DEVICE constexpr const T & min(const T &a, const T &b)
std::min
Definition: platform.h:209
+
CUTLASS_HOST_DEVICE Index & at()
Gets the index of a given Coord element.
Definition: coord.h:240
+
CUTLASS_HOST_DEVICE Coord & operator/=(Coord const &b)
In-place division.
Definition: coord.h:206
+
Definition: coord.h:42
+
CUTLASS_HOST_DEVICE Coord< Slice > slice(int start=0, Index identity=0) const
Definition: coord.h:102
+
Statically-sized array specifying Coords within a tensor.
Definition: coord.h:49
+
CUTLASS_HOST_DEVICE Index const & at() const
Gets the index of a given Coord element.
Definition: coord.h:250
+
CUTLASS_HOST_DEVICE T dot(Coord const &b, T sum) const
Computes the dot product of two Coord instances.
Definition: coord.h:221
+
CUTLASS_HOST_DEVICE Coord(Index value=0)
Default ctor initializes uniformly.
Definition: coord.h:76
+
Definition: coord.h:42
+
CUTLASS_HOST_DEVICE Coord & clamp(Coord< kRank > const &max, Coord< kRank > const &min=Coord< kRank >())
Clamps a coordinate to a range specified by maximum and minimum values.
Definition: coord.h:274
+
CUTLASS_HOST_DEVICE Coord(Index _idx[])
Constructs from an array of integers.
Definition: coord.h:84
+
CUTLASS_HOST_DEVICE T dot(Coord const &b) const
Computes the dot product of two Coord instances.
Definition: coord.h:230
+
CUTLASS_HOST_DEVICE Coord operator+(Coord const &b) const
Element-wise addition.
Definition: coord.h:139
Basic include for CUTLASS macros.
-
CUTLASS_HOST_DEVICE Coord & operator*=(Coord const &b)
In-place multiplication.
Definition: coord.h:142
-
CUTLASS_HOST_DEVICE Coord operator+(Coord const &b) const
Element-wise addition.
Definition: coord.h:84
-
CUTLASS_HOST_DEVICE Coord & operator/=(Coord const &b)
In-place division.
Definition: coord.h:151
+
CUTLASS_HOST_DEVICE Coord(Coord< kRank > const &coord)
Constructs from an array of integers.
Definition: coord.h:92
+
CUTLASS_HOST_DEVICE bool operator!() const
Returns true if Coord is uniformly zero.
Definition: coord.h:128
diff --git a/docs/core__io_8h.html b/docs/core__io_8h.html index d71c39716..2f50d7851 100644 --- a/docs/core__io_8h.html +++ b/docs/core__io_8h.html @@ -73,6 +73,8 @@ $(function() {
core_io.h File Reference
@@ -83,51 +85,56 @@ $(function() { More...

#include <iosfwd>
#include <typeinfo>
-#include <cutlass/coord.h>
+#include "cutlass/coord.h"
+#include "cutlass/vector.h"

Go to the source code of this file.

+ + + + +

+Classes

struct  cutlass::ScalarIO< T >
 Helper to enable formatted printing of CUTLASS scalar types to an ostream. More...
 
+ + + +

+Namespaces

 cutlass
 
- - - + + + + + + + + + + + + + + + + + + + + + + + + + + +

Functions

template<int Rank>
std::ostream & operator<< (std::ostream &out, cutlass::Coord< Rank > const &coord)
 
template<int Rank>
std::ostream & cutlass::operator<< (std::ostream &out, Coord< Rank > const &coord)
 
template<typename T >
std::ostream & cutlass::operator<< (std::ostream &out, ScalarIO< T > const &scalar)
 Default printing to ostream. More...
 
template<>
std::ostream & cutlass::operator<< (std::ostream &out, ScalarIO< int8_t > const &scalar)
 Printing to ostream of int8_t as integer rather than character. More...
 
template<>
std::ostream & cutlass::operator<< (std::ostream &out, ScalarIO< uint8_t > const &scalar)
 Printing to ostream of uint8_t as integer rather than character. More...
 
template<>
std::ostream & cutlass::operator<< (std::ostream &out, ScalarIO< cutlass::Vector< cutlass::bin1_t, 32 > > const &scalar)
 Printing to ostream of vector of 1b elements. More...
 
template<>
std::ostream & cutlass::operator<< (std::ostream &out, ScalarIO< cutlass::Vector< cutlass::int4_t, 8 > > const &scalar)
 Printing to ostream of vector of 4b signed integer elements. More...
 
template<>
std::ostream & cutlass::operator<< (std::ostream &out, ScalarIO< cutlass::Vector< cutlass::uint4_t, 8 > > const &scalar)
 Printing to ostream of vector of 4b unsigned integer elements. More...
 
-

Function Documentation

- -

◆ operator<<()

- -
-
-
-template<int Rank>
- - - - - - - - - - - - - - - - - - -
std::ostream& operator<< (std::ostream & out,
cutlass::Coord< Rank > const & coord 
)
-
- -
-
diff --git a/docs/core__io_8h_source.html b/docs/core__io_8h_source.html index 7c076c94d..21b790113 100644 --- a/docs/core__io_8h_source.html +++ b/docs/core__io_8h_source.html @@ -76,11 +76,19 @@ $(function() {
core_io.h
-Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 #pragma once
26 
31 #pragma once
32 
33 #include <iosfwd>
34 #include <typeinfo>
35 
36 #include <cutlass/coord.h>
37 
38 template <int Rank>
39 std::ostream& operator<<(std::ostream& out, cutlass::Coord<Rank> const& coord) {
40  for (int i = 0; i < Rank; ++i) {
41  out << (i ? ", " : "") << coord.idx[i];
42  }
43  return out;
44 }
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <iosfwd>
32 #include <typeinfo>
33 
34 #include "cutlass/coord.h"
35 #include "cutlass/vector.h"
36 
37 namespace cutlass {
38 
40 
41 template <int Rank>
42 std::ostream& operator<<(std::ostream& out, Coord<Rank> const& coord) {
43  for (int i = 0; i < Rank; ++i) {
44  out << (i ? ", " : "") << coord.idx[i];
45  }
46  return out;
47 }
48 
50 
52 template <typename T>
53 struct ScalarIO {
54 
56  T value;
57 
59  ScalarIO() { }
60 
63 };
64 
66 
68 template <typename T>
69 inline std::ostream &operator<<(std::ostream &out, ScalarIO<T> const &scalar) {
70  return out << scalar.value;
71 }
72 
74 template <>
75 inline std::ostream &operator<<(std::ostream &out, ScalarIO<int8_t> const &scalar) {
76  return out << int(scalar.value);
77 }
78 
80 template <>
81 inline std::ostream &operator<<(std::ostream &out, ScalarIO<uint8_t> const &scalar) {
82  return out << unsigned(scalar.value);
83 }
84 
86 template <>
87 inline std::ostream &operator<<(
88  std::ostream &out,
90 
91  for (int i = 0; i < 32; i++) {
92  out << int(scalar.value[i]);
93  out << ((i != 31) ? ", " : "");
94  }
95  return out;
96 }
97 
99 template <>
100 inline std::ostream &operator<<(
101  std::ostream &out,
103 
104  for (int i = 0; i < 8; i++) {
105  out << int(scalar.value[i]);
106  out << ((i != 7) ? ", " : "");
107  }
108  return out;
109 }
110 
112 template <>
113 inline std::ostream &operator<<(
114  std::ostream &out,
116 
117  for (int i = 0; i < 8; i++) {
118  out << unsigned(scalar.value[i]);
119  out << ((i != 7) ? ", " : "");
120  }
121  return out;
122 }
123 
125 
126 } // namespace cutlass
Definition: convert.h:33
+
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
+
ScalarIO(T value)
Constructs from a value.
Definition: core_io.h:62
+
ScalarIO()
Default ctor.
Definition: core_io.h:59
+
std::ostream & operator<<(std::ostream &out, Coord< Rank > const &coord)
Definition: core_io.h:42
+
Helper to enable formatted printing of CUTLASS scalar types to an ostream.
Definition: core_io.h:53
+
Definition: vector.h:62
+
T value
Value to print.
Definition: core_io.h:56
+
Defines a 1D vector of elements held in the registers of each thread.
diff --git a/docs/cutlass_8h.html b/docs/cutlass_8h.html index bbb0463c9..419c9123f 100644 --- a/docs/cutlass_8h.html +++ b/docs/cutlass_8h.html @@ -73,8 +73,10 @@ $(function() {
cutlass.h File Reference
@@ -85,6 +87,13 @@ $(function() {

Go to the source code of this file.

+ + + + + +

+Classes

struct  DebugType< T >
 
struct  DebugValue< Value >
 
@@ -96,18 +105,26 @@ Macros - + + + - - + + +

Namespaces

 cutlass
 
#define CUTLASS_MINOR   0
 
#define CUTLASS_PATCH   0
#define CUTLASS_PATCH   1
 
#define CUTLASS_VERSION   ((CUTLASS_MAJOR)*100 + (CUTLASS_MINOR)*10 + CUTLASS_PATCH)
 
#define CUTLASS_HOST_DEVICE
 
#define CUTLASS_ASSERT(x)   assert(x)
 
#define CUTLASS_PRAGMA_UNROLL
 
#define CUTLASS_PRAGMA_NO_UNROLL
 
#define CUTLASS_ASSERT(x)   assert(x)
 
#define CUTLASS_GEMM_LOOP   CUTLASS_PRAGMA_NO_UNROLL
 
+ + + +

+Functions

template<typename T >
void DebugTypeFunc (T const &t)
 

Macro Definition Documentation

@@ -126,6 +143,20 @@ Macros
+
+
+ +

◆ CUTLASS_GEMM_LOOP

+ +
+
+ + + + +
#define CUTLASS_GEMM_LOOP   CUTLASS_PRAGMA_NO_UNROLL
+
+
@@ -177,7 +208,7 @@ Macros
- +
#define CUTLASS_PATCH   0#define CUTLASS_PATCH   1
@@ -224,12 +255,33 @@ Macros
+
+
+

Function Documentation

+ +

◆ DebugTypeFunc()

+ +
+
+
+template<typename T >
+ + + + + + + + +
void DebugTypeFunc (T const & t)
+
+
diff --git a/docs/cutlass_8h_source.html b/docs/cutlass_8h_source.html index d2f442295..9c9fb2b29 100644 --- a/docs/cutlass_8h_source.html +++ b/docs/cutlass_8h_source.html @@ -76,11 +76,14 @@ $(function() {
cutlass.h
-Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 
30 #pragma once
31 
33 
34 #define CUTLASS_MAJOR 1
35 #define CUTLASS_MINOR 0
36 #define CUTLASS_PATCH 0
37 #define CUTLASS_VERSION ((CUTLASS_MAJOR)*100 + (CUTLASS_MINOR)*10 + CUTLASS_PATCH)
38 
39 #ifdef __NVCC__
40 #define CUTLASS_HOST_DEVICE __forceinline__ __device__ __host__
41 #define CUTLASS_DEVICE __forceinline__ __device__
42 #elif defined(__CUDACC_RTC__)
43 #define CUTLASS_HOST_DEVICE __forceinline__ __device__
44 #define CUTLASS_DEVICE __forceinline__ __device__
45 #else
46 #define CUTLASS_HOST_DEVICE
47 // CUTLASS_DEVICE is an error if not compiling device code
48 #endif
49 
50 // CUTLASS_PRAGMA_UNROLL inserts a CUTLASS_PRAGMA_UNROLL if supported by the compiler
51 #if defined(__CUDA_ARCH__)
52 #if defined(_MSC_VER)
53 #define CUTLASS_PRAGMA_UNROLL __pragma("unroll")
54 #define CUTLASS_PRAGMA_NO_UNROLL __pragma("unroll 1")
55 #else
56 #define CUTLASS_PRAGMA_UNROLL _Pragma("unroll")
57 #define CUTLASS_PRAGMA_NO_UNROLL _Pragma("unroll 1")
58 #endif
59 #else
60 #define CUTLASS_PRAGMA_UNROLL
61 #define CUTLASS_PRAGMA_NO_UNROLL
62 #endif
63 
64 #define CUTLASS_ASSERT(x) assert(x)
65 
66 namespace cutlass {
67 
69 static const int kWarpSize = 32;
70 
71 } // namespace cutlass
72 
Definition: convert.h:33
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 
30 #pragma once
31 
33 
34 #define CUTLASS_MAJOR 1
35 #define CUTLASS_MINOR 0
36 #define CUTLASS_PATCH 1
37 #define CUTLASS_VERSION ((CUTLASS_MAJOR)*100 + (CUTLASS_MINOR)*10 + CUTLASS_PATCH)
38 
39 #ifdef __NVCC__
40 #define CUTLASS_HOST_DEVICE __forceinline__ __device__ __host__
41 #define CUTLASS_DEVICE __forceinline__ __device__
42 #elif defined(__CUDACC_RTC__)
43 #define CUTLASS_HOST_DEVICE __forceinline__ __device__
44 #define CUTLASS_DEVICE __forceinline__ __device__
45 #else
46 #define CUTLASS_HOST_DEVICE
47 // CUTLASS_DEVICE is an error if not compiling device code
48 #endif
49 
50 #define CUTLASS_ASSERT(x) assert(x)
51 
52 // CUTLASS_PRAGMA_(UNROLL|NO_UNROLL) optimization directives for the CUDA compiler.
53 #if defined(__CUDA_ARCH__)
54 #if defined(_MSC_VER)
55 #define CUTLASS_PRAGMA_UNROLL __pragma("unroll")
56 #define CUTLASS_PRAGMA_NO_UNROLL __pragma("unroll 1")
57 #else
58 #define CUTLASS_PRAGMA_UNROLL _Pragma("unroll")
59 #define CUTLASS_PRAGMA_NO_UNROLL _Pragma("unroll 1")
60 #endif
61 #else
62 #define CUTLASS_PRAGMA_UNROLL
63 #define CUTLASS_PRAGMA_NO_UNROLL
64 #endif
65 
66 #define CUTLASS_GEMM_LOOP CUTLASS_PRAGMA_NO_UNROLL
67 
68 // A small helper class to dump a type at compile time
69 // Usage:: DumpType<Class>::Class
70 template <typename T>
71 struct DebugType {};
72 
73 template <typename T>
74 void DebugTypeFunc(T const& t) {
75  T::t;
76 }
77 
78 // A small helper class to dump a compile time constant at compile time
79 // Usage: DumpValue<Class::kConstant>::kConstant
80 template <int Value>
81 struct DebugValue {};
82 
83 namespace cutlass {
84 
86 static const int kWarpSize = 32;
87 
88 } // namespace cutlass
89 
Definition: convert.h:33
+
Definition: cutlass.h:81
+
Definition: cutlass.h:71
+
void DebugTypeFunc(T const &t)
Definition: cutlass.h:74
diff --git a/docs/cutlass__math_8h.html b/docs/cutlass__math_8h.html index 953b0d4c7..c4dbc54b0 100644 --- a/docs/cutlass__math_8h.html +++ b/docs/cutlass__math_8h.html @@ -83,7 +83,7 @@ $(function() {

Math utilities. More...

-
#include <cutlass/util/platform.h>
+

Go to the source code of this file.

@@ -103,6 +103,10 @@ Classes + + + +
 
struct  cutlass::divide_assert< Dividend, Divisor >
 
struct  cutlass::Min< A, B >
 
struct  cutlass::Max< A, B >
 
@@ -120,11 +124,17 @@ Functions + + + + + +

Namespaces

template<typename value_t >
CUTLASS_HOST_DEVICE value_t cutlass::lcm (value_t a, value_t b)
 
template<typename value_t >
CUTLASS_HOST_DEVICE value_t cutlass::clz (value_t x)
 
template<typename value_t >
CUTLASS_HOST_DEVICE value_t cutlass::find_log2 (value_t x)
 
diff --git a/docs/cutlass__math_8h_source.html b/docs/cutlass__math_8h_source.html index 2809a8456..8381f641a 100644 --- a/docs/cutlass__math_8h_source.html +++ b/docs/cutlass__math_8h_source.html @@ -76,27 +76,33 @@ $(function() {
cutlass_math.h
-Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 
26 #pragma once
27 
33 #include <cutlass/util/platform.h>
34 
35 namespace cutlass {
36 
37 /******************************************************************************
38  * Static math utilities
39  ******************************************************************************/
40 
44 template <int N>
45 struct is_pow2 : platform::integral_constant<bool, (N & (N - 1)) == 0> {};
46 
50 template <int N, int CurrentVal = N, int Count = 0>
51 struct log2_down {
53  enum { value = log2_down<N, (CurrentVal >> 1), Count + 1>::value };
54 };
55 
56 // Base case
57 template <int N, int Count>
58 struct log2_down<N, 1, Count> {
59  enum { value = Count };
60 };
61 
65 template <int N, int CurrentVal = N, int Count = 0>
66 struct log2_up {
68  enum { value = log2_up<N, (CurrentVal >> 1), Count + 1>::value };
69 };
70 
71 // Base case
72 template <int N, int Count>
73 struct log2_up<N, 1, Count> {
74  enum { value = ((1 << Count) < N) ? Count + 1 : Count };
75 };
76 
80 template <int N>
81 struct sqrt_est {
82  enum { value = 1 << (log2_up<N>::value / 2) };
83 };
84 
89 template <int Dividend, int Divisor>
90 struct divide_assert {
91  enum { value = Dividend / Divisor };
92 
93  static_assert((Dividend % Divisor == 0), "Not an even multiple");
94 };
95 
96 /******************************************************************************
97  * Rounding
98  ******************************************************************************/
99 
103 template <typename dividend_t, typename divisor_t>
104 CUTLASS_HOST_DEVICE dividend_t round_nearest(dividend_t dividend, divisor_t divisor) {
105  return ((dividend + divisor - 1) / divisor) * divisor;
106 }
107 
111 template <typename value_t>
112 CUTLASS_HOST_DEVICE value_t gcd(value_t a, value_t b) {
113  for (;;) {
114  if (a == 0) return b;
115  b %= a;
116  if (b == 0) return a;
117  a %= b;
118  }
119 }
120 
124 template <typename value_t>
125 CUTLASS_HOST_DEVICE value_t lcm(value_t a, value_t b) {
126  value_t temp = gcd(a, b);
127 
128  return temp ? (a / temp * b) : 0;
129 }
130 
131 } // namespace cutlass
Definition: cutlass_math.h:91
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 
26 #pragma once
27 
33 #include "cutlass/util/platform.h"
34 
35 namespace cutlass {
36 
37 /******************************************************************************
38  * Static math utilities
39  ******************************************************************************/
40 
44 template <int N>
45 struct is_pow2 : platform::integral_constant<bool, (N & (N - 1)) == 0> {};
46 
50 template <int N, int CurrentVal = N, int Count = 0>
51 struct log2_down {
53  enum { value = log2_down<N, (CurrentVal >> 1), Count + 1>::value };
54 };
55 
56 // Base case
57 template <int N, int Count>
58 struct log2_down<N, 1, Count> {
59  enum { value = Count };
60 };
61 
65 template <int N, int CurrentVal = N, int Count = 0>
66 struct log2_up {
68  enum { value = log2_up<N, (CurrentVal >> 1), Count + 1>::value };
69 };
70 
71 // Base case
72 template <int N, int Count>
73 struct log2_up<N, 1, Count> {
74  enum { value = ((1 << Count) < N) ? Count + 1 : Count };
75 };
76 
80 template <int N>
81 struct sqrt_est {
82  enum { value = 1 << (log2_up<N>::value / 2) };
83 };
84 
89 template <int Dividend, int Divisor>
90 struct divide_assert {
91  enum { value = Dividend / Divisor };
92 
93  static_assert((Dividend % Divisor == 0), "Not an even multiple");
94 };
95 
96 /******************************************************************************
97  * Rounding
98  ******************************************************************************/
99 
103 template <typename dividend_t, typename divisor_t>
104 CUTLASS_HOST_DEVICE dividend_t round_nearest(dividend_t dividend, divisor_t divisor) {
105  return ((dividend + divisor - 1) / divisor) * divisor;
106 }
107 
111 template <typename value_t>
112 CUTLASS_HOST_DEVICE value_t gcd(value_t a, value_t b) {
113  for (;;) {
114  if (a == 0) return b;
115  b %= a;
116  if (b == 0) return a;
117  a %= b;
118  }
119 }
120 
124 template <typename value_t>
125 CUTLASS_HOST_DEVICE value_t lcm(value_t a, value_t b) {
126  value_t temp = gcd(a, b);
127 
128  return temp ? (a / temp * b) : 0;
129 }
130 
136 template <typename value_t>
137 CUTLASS_HOST_DEVICE value_t clz(value_t x) {
138  for (int i = 31; i >= 0; --i) {
139  if ((1 << i) & x) return 31 - i;
140  }
141  return 32;
142 }
143 
144 template <typename value_t>
145 CUTLASS_HOST_DEVICE value_t find_log2(value_t x) {
146  int a = 31 - clz(x);
147  a += (x & (x - 1)) != 0; // Round up, add 1 if not a power of 2.
148  return a;
149 }
150 
151 /******************************************************************************
152  * Min/Max
153  ******************************************************************************/
154 
155 template <int A, int B>
156 struct Min {
157  static int const kValue = (A < B) ? A : B;
158 };
159 
160 template <int A, int B>
161 struct Max {
162  static int const kValue = (A > B) ? A : B;
163 };
164 
165 } // namespace cutlass
Definition: cutlass_math.h:91
Definition: convert.h:33
+
static int const kValue
Definition: cutlass_math.h:157
+
CUTLASS_HOST_DEVICE value_t find_log2(value_t x)
Definition: cutlass_math.h:145
Definition: cutlass_math.h:51
C++ features that may be otherwise unimplemented for CUDA device functions.
+
Definition: cutlass_math.h:156
Definition: cutlass_math.h:53
CUTLASS_HOST_DEVICE value_t lcm(value_t a, value_t b)
Definition: cutlass_math.h:125
CUTLASS_HOST_DEVICE dividend_t round_nearest(dividend_t dividend, divisor_t divisor)
Definition: cutlass_math.h:104
Definition: cutlass_math.h:68
-
std::integral_constant
Definition: platform.h:274
+
std::integral_constant
Definition: platform.h:282
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
-
#define static_assert(__e, __m)
Definition: platform.h:145
+
#define static_assert(__e, __m)
Definition: platform.h:153
+
Definition: cutlass_math.h:161
Definition: cutlass_math.h:82
CUTLASS_HOST_DEVICE value_t gcd(value_t a, value_t b)
Definition: cutlass_math.h:112
Definition: cutlass_math.h:90
Definition: cutlass_math.h:66
+
CUTLASS_HOST_DEVICE value_t clz(value_t x)
Definition: cutlass_math.h:137
Definition: cutlass_math.h:45
+
static int const kValue
Definition: cutlass_math.h:162
Definition: cutlass_math.h:81
diff --git a/docs/debug_8h.html b/docs/debug_8h.html index 1f88396ab..81ed9f3ca 100644 --- a/docs/debug_8h.html +++ b/docs/debug_8h.html @@ -231,7 +231,7 @@ Functions
- + - +

Classes

struct  cutlass::gemm::DgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ >
struct  cutlass::gemm::DgemmConfig< OutputTile_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_ >
 
struct  cutlass::gemm::DgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ >
struct  cutlass::gemm::DgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ >
 
diff --git a/docs/dgemm__traits_8h_source.html b/docs/dgemm__traits_8h_source.html index 9cf2c8738..d7cdbe529 100644 --- a/docs/dgemm__traits_8h_source.html +++ b/docs/dgemm__traits_8h_source.html @@ -76,26 +76,26 @@ $(function() {
dgemm_traits.h
-Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/gemm/gemm.h>
37 
38 namespace cutlass {
39 namespace gemm {
40 
42 
43 template <
45  typename OutputTile_,
47  typename AccumulatorsPerThread_,
49  int kScalarsPerLdgA_ = 1,
51  int kScalarsPerLdgB_ = 1>
53  : public GemmConfig<
55  double,
57  double,
59  double,
61  double,
63  OutputTile_,
65  ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, double, double, double>,
67  kScalarsPerLdgA_,
69  kScalarsPerLdgA_,
71  2,
73  kScalarsPerLdgB_,
75  kScalarsPerLdgB_,
77  2,
79  1,
81  2,
83  1,
85  2> {};
86 
88 
89 template <
91  MatrixLayout::Kind kLayoutA_,
93  MatrixLayout::Kind kLayoutB_,
95  typename OutputTile_ = Shape<8, 64, 128>,
97  typename EpilogueFunctor_ = LinearScaling<double>,
99  typename AccumulatorsPerThread_ = Shape<8, 8, 8>,
101  int kScalarsPerLdgA_ = 1,
103  int kScalarsPerLdgB_ = 1,
105  typename Index_ = int,
107  typename GemmConfig_ =
110  typename GemmEpilogueTraits_ =
113  // The layout for A.
114  kLayoutA_,
115  // The layout for B.
116  kLayoutB_,
117  // The config.
118  GemmConfig_,
119  // The epilogue.
120  GemmEpilogue<GemmEpilogueTraits_>,
121  // The index.
122  Index_> {};
123 
125 
126 } // namespace gemm
127 } // namespace cutlass
Definition: convert.h:33
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include "cutlass/gemm/gemm.h"
37 
38 namespace cutlass {
39 namespace gemm {
40 
42 
43 template <
45  typename OutputTile_,
47  typename ThreadGemmShape_,
49  int kScalarsPerLdgA_ = 1,
51  int kScalarsPerLdgB_ = 1>
53  : public GemmConfig<
55  double,
57  double,
59  double,
61  double,
63  OutputTile_,
65  ThreadMultiplyAdd<ThreadGemmShape_, Shape<1, 4, 8>, double, double, double>,
67  kScalarsPerLdgA_,
69  kScalarsPerLdgA_,
71  2,
73  kScalarsPerLdgB_,
75  kScalarsPerLdgB_,
77  2,
79  1,
81  2,
83  1,
85  2,
87  false,
89  false,
91  false
92  >{};
93 
95 
96 template <
98  MatrixLayout::Kind kLayoutA_,
100  MatrixLayout::Kind kLayoutB_,
102  typename OutputTile_ = Shape<8, 64, 128>,
104  typename EpilogueFunctor_ = LinearScaling<double>,
106  typename ThreadGemmShape_ = Shape<8, 8, 8>,
108  int kScalarsPerLdgA_ = 1,
110  int kScalarsPerLdgB_ = 1,
112  typename Index_ = int,
114  typename GemmConfig_ =
117  typename GemmEpilogueTraits_ =
120  // The layout for A.
121  kLayoutA_,
122  // The layout for B.
123  kLayoutB_,
124  // The config.
125  GemmConfig_,
126  // The epilogue.
127  GemmEpilogue<GemmEpilogueTraits_>,
128  // The index.
129  Index_> {};
130 
132 
133 } // namespace gemm
134 } // namespace cutlass
Definition: convert.h:33
Defines iterators for efficiently loading and storing to global memory.
Defines structural properties of complete GEMM computation.
Template implementing matrix multiply-add operations on fragments.
Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the comp...
Defines iterators for efficiently loading and storing tiles to and from shared memory.
-
Definition: gemm_traits.h:79
-
Definition: dgemm_traits.h:112
+
Definition: gemm_config.h:76
+
Definition: dgemm_traits.h:119
Definition: dgemm_traits.h:52
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
-
Definition: gemm_epilogue_traits.h:300
-
Kind
Definition: matrix_traits.h:36
-
Functor to compute linear combination of fragments.
Definition: linear_scaling.h:40
+
Definition: gemm_epilogue_traits.h:323
+
Kind
Enumeration defining fundamental contiguous layouts.
Definition: matrix_traits.h:159
+
Functor to compute linear combination of fragments.
Definition: linear_scaling.h:51
Implements a software-pipelined efficient GEMM.
Defines structural properties of the GEMM epilogue.
-
Definition: gemm_traits.h:723
+
Definition: gemm_traits.h:650
diff --git a/docs/dir_1417ee5ebebc309c36b7962f26a92c39.html b/docs/dir_1417ee5ebebc309c36b7962f26a92c39.html index d7393ef13..6555e36cf 100644 --- a/docs/dir_1417ee5ebebc309c36b7962f26a92c39.html +++ b/docs/dir_1417ee5ebebc309c36b7962f26a92c39.html @@ -101,15 +101,15 @@ Files
- - - + + + @@ -128,12 +128,24 @@ Files + + + + + + + + + + + + @@ -143,11 +155,20 @@ Files + + + + + + + + +

@@ -109,7 +109,7 @@ Namespaces

file  fragment.h [code]
 Defines Fragment, a statically-sized array for storing parts of matrices within a thread's registers.
 
file  fragment_load_store.h [code]
 Defines accessors for loading and storing fragments to memory efficiently.
 
file  fragment_multiply_add.h [code]
 Defines multiply-add operations on fragments within a thread.
 
file  iterator_access.h [code]
 Free functions for loading and storing to implementations of tile iteartor concepts.
 
file  kernel_launch.h [code]
 Defines structures and helpers to launch CUDA kernels within CUTLASS.
 
file  load_store.h [code]
 Defines abstractions for efficiently loading and storing vectors to memory.
 
file  tensor_ref.h [code]
 Defines a structure containing strides, bounds, and a pointer to tensor data.
 
file  tensor_ref_collection.h [code]
 Introduces TensorRefCollection concept and defines TensorRefBatch and TensorRefArray.
 
file  tensor_view.h [code]
 Defines a structure containing strides and a pointer to tensor data.
 
file  tile_allocation.h [code]
 Defines a fragment based on a Shape<> template.
 
file  tile_coord.h [code]
 Defines a coordinate used for the CUTLASS 4-D tile structure.
 
file  tile_iterator.h [code]
 Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
 
file  tile_stream.h [code]
 Implements the tile stream concept, composing an iterator with a transformation. Offers split-phase semantics, separating the initiation of an asynchronous memory operation with a fence forcing it to complete.
 
file  tile_traits_standard.h [code]
 Defines tile traits for several tile partitioning arrangements of threads expected to achieve efficient streaming performance.
 
file  wmma_matrix.h [code]
 Abstractions for loading and storing matrices using the CUDA WMMA API.
 
file  zip_fragment.h [code]
 Models a pair of fragments.
 
file  zip_tensor_ref.h [code]
 Defines a structure containing a pair of TensorRef-like objects.
 
file  zip_tile_iterator.h [code]
 Constructs an iterator that owns two tile iterator instances.
 
diff --git a/docs/dir_18d6a367a3982a494d65599933fc67a3.html b/docs/dir_18d6a367a3982a494d65599933fc67a3.html index 161267475..b606ad3e0 100644 --- a/docs/dir_18d6a367a3982a494d65599933fc67a3.html +++ b/docs/dir_18d6a367a3982a494d65599933fc67a3.html @@ -85,9 +85,24 @@ Files
file  dgemm_traits.h [code]
 Defines structural traits of double-precision GEMM.
 
file  fp16_sgemm_multiply_add.h [code]
 Template implementing matrix multiply-add operations on fragments.
 
file  fp16_sgemm_traits.h [code]
 Defies structural properties of single-precision GEMM where any number of the input/output could be fp16 or fp32. The accumulator type stays in fp32.
 
file  gemm.h [code]
 Implements a software-pipelined efficient GEMM.
 
file  gemm_config.h [code]
 Defines properties of GEMM computation that impose some constraints on caller.
 
file  gemm_coord.h [code]
 GemmCoord is a structure derived from Coord<4> that specifies a location within the coordinate system of a GEMM problem.
 
file  gemm_desc.h [code]
 Implements a software-pipelined efficient GEMM.
 
file  gemm_epilogue.h [code]
 Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the computed matrix product.
 
file  gemm_shared_tile.h [code]
 Defines iterators for efficiently loading and storing tiles to and from shared memory.
 
file  gemm_stream_pair.h [code]
 Defines a pair of GEMM tile streams.
 
file  gemm_traits.h [code]
 Defines structural properties of complete GEMM computation.
 
file  hgemm_traits.h [code]
 Defies structural properties of half-precision GEMM computation.
 
file  identity_block_swizzle.h [code]
 Defies functors for mapping blockIdx to partitions of the GEMM computation.
 
file  igemm_epilogue.h [code]
 Defines the epilogue phase of the GEMM computation for IGEMM, supporting integer and floating-point output matrix formats.
 
file  linear_scaling.h [code]
 Implements the BLAS linear scaling function alpha*AB + beta*C.
 
file  linear_scaling_device_ptr.h [code]
 Implements the BLAS linear scaling function alpha*AB + beta*C.
 
file  scalar_or_pointer.h [code]
 Implements the BLAS linear scaling function alpha*AB + beta*C.
 
file  sgemm_traits.h [code]
 Defies structural properties of single-precision GEMM.
 
file  thread_multiply_add.h [code]
 Template implementing matrix multiply-add operations on fragments.
 
file  threadblock_swizzle.h [code]
 Defies functors for mapping blockIdx to partitions of the GEMM computation.
 
file  wmma_gemm_epilogue_traits.h [code]
 Defines structural properties of WMMA GEMM's epilogue phase.
 
+ + + + @@ -92,7 +96,7 @@ Files diff --git a/docs/files.html b/docs/files.html index 2c06de5a8..101952090 100644 --- a/docs/files.html +++ b/docs/files.html @@ -75,62 +75,79 @@ $(function() {
Here is a list of all files with brief descriptions:

Files

file  complex.h [code]
 
file  cutlass_math.h [code]
 Math utilities.
 
file  debug.h [code]
 Debugging and logging functionality.
 
file  numeric_types.h [code]
 
file  platform.h [code]
 C++ features that may be otherwise unimplemented for CUDA device functions.
 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 clear_accumulators.hDefines abstractions for efficiently clearing accumulator tiles
 convert.hDefines conversion operations among Fragments of different base type
 coord.hA Coord is a coordinate of arbitrary rank into a tensor or matrix
 core_io.hHelpers for printing cutlass/core objects
 cutlass.hBasic include for CUTLASS macros
 cutlass_math.hMath utilities
 debug.hDebugging and logging functionality
 dgemm_traits.hDefines structural traits of double-precision GEMM
 fragment.hDefines Fragment, a statically-sized array for storing parts of matrices within a thread's registers
 fragment_load_store.hDefines accessors for loading and storing fragments to memory efficiently
 fragment_multiply_add.hDefines multiply-add operations on fragments within a thread
 gemm.hImplements a software-pipelined efficient GEMM
 gemm_epilogue.hImplements the epilogue phase of the GEMM kernel that efficiently updates global memory with the computed matrix product
 gemm_epilogue_traits.hDefines structural properties of the GEMM epilogue
 gemm_global_stream.hImplements efficient loading of the thread block-level tile from global memory and storing to shared memory
 gemm_global_tile.hDefines iterators for efficiently loading and storing to global memory
 gemm_operand.hDefines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory
 gemm_shared_stream.hDefines abstractions for managing loading and storing fragments to shared memory in the efficient GEMM pipeline
 gemm_shared_tile.hDefines iterators for efficiently loading and storing tiles to and from shared memory
 gemm_traits.hDefines structural properties of complete GEMM computation
 hgemm_global_tile.hTile traits used to construct global tile iterator for HGEMM. This is intended to partition the thread block-level tile into 2D subtiles loaded by the threads and facilitate memory accesses larger than 16 bits
 hgemm_multiply_add.hSpecialization implementing multiply-add operation on half-precision floating point fragments
 hgemm_swizzle.hTransposes a tile of 16b elements. Used by HGEMM to construct a K-strided layout in shared memory for multiplicands
 hgemm_traits.hDefies structural properties of half-precision GEMM computation
 identity_block_swizzle.hDefies functors for mapping blockIdx to partitions of the GEMM computation
 igemm_epilogue.hDefines the epilogue phase of the GEMM computation for IGEMM, supporting integer and floating-point output matrix formats
 igemm_global_tile.hImplements tile iterators to partition the thread block tile into 2D subtiles and efficiently load each. Applies permute transformation to construct 'interleaved K-strided' data layout in which 4-element dot products from the same K index are arranged in consecutive locations within shared memory
 igemm_multiply_add.hImplements matrix multiply accumulate operation of 8-bit integer data using DP4A instruction
 igemm_swizzle.hTransposes a fragment of data containing packed 8-bit integer elements
 igemm_traits.hDefies structural properties of mixed-precision integer GEMM. Multiplicands are assumed to be packed 8bit integers, accumulators are assumed to be 32b signed integers, and output formats vary
 iterator_access.hFree functions for loading and storing to implementations of tile iteartor concepts
 linear_scaling.hImplements the BLAS linear scaling function alpha*AB + beta*C
 load_store.hDefines abstractions for efficiently loading and storing vectors to memory
 matrix_traits.hDefines properties of matrices used to denote layout and operands to GEMM kernels
 platform.hC++ features that may be otherwise unimplemented for CUDA device functions
 predicate_vector.hDefines container classes and iterators for managing a statically sized vector of boolean predicates
 reshape_tile.hDefines a type for restructuring a tile
 sgemm_traits.hDefies structural properties of single-precision GEMM
 shape.hDefines Shape implementing the Layout concept for representing a 4D hypercube of objects
 tensor_ref.hDefines a structure containing strides, bounds, and a pointer to tensor data
 tensor_view.hDefines a structure containing strides and a pointer to tensor data
 thread_multiply_add.hTemplate implementing matrix multiply-add operations on fragments
 tile_iterator.hDefines the Tile Traits concept and iterators for loading and storing to tiles efficiently
 tile_traits_standard.hDefines tile traits for several tile partitioning arrangements of threads expected to achieve efficient streaming performance
 vector.hDefines a 1D vector of elements held in the registers of each thread
 wmma_gemm_epilogue_traits.hDefines structural properties of WMMA GEMM's epilogue phase
 wmma_gemm_global_tile.hDefines tile iterator traits for loading thread block-level tile from global memory
 wmma_gemm_multiply_add.hImplements warp-level matrix multiply-accumulate operation using CUDA WMMA API
 wmma_gemm_shared_tile.hDefines iterator traits for efficiently loading and storing fragment to and from shared memory, specialized for WMMA GEMM
 wmma_gemm_traits.hDefies structural properties of GEMM targeting WMMA API in CUDA
 wmma_matrix.hAbstractions for loading and storing matrices using the CUDA WMMA API
 complex.h
 convert.hDefines conversion operations among Fragments of different base type
 coord.hA Coord is a coordinate of arbitrary rank into a tensor or matrix
 core_io.hHelpers for printing cutlass/core objects
 cutlass.hBasic include for CUTLASS macros
 cutlass_math.hMath utilities
 debug.hDebugging and logging functionality
 dgemm_traits.hDefines structural traits of double-precision GEMM
 fp16_sgemm_multiply_add.hTemplate implementing matrix multiply-add operations on fragments
 fp16_sgemm_traits.hDefies structural properties of single-precision GEMM where any number of the input/output could be fp16 or fp32. The accumulator type stays in fp32
 fragment.hDefines Fragment, a statically-sized array for storing parts of matrices within a thread's registers
 fragment_multiply_add.hDefines multiply-add operations on fragments within a thread
 gemm.hImplements a software-pipelined efficient GEMM
 gemm_config.hDefines properties of GEMM computation that impose some constraints on caller
 gemm_coord.hGemmCoord is a structure derived from Coord<4> that specifies a location within the coordinate system of a GEMM problem
 gemm_desc.hImplements a software-pipelined efficient GEMM
 gemm_epilogue.hImplements the epilogue phase of the GEMM kernel that efficiently updates global memory with the computed matrix product
 gemm_epilogue_traits.hDefines structural properties of the GEMM epilogue
 gemm_global_stream.hImplements efficient loading of the thread block-level tile from global memory and storing to shared memory
 gemm_global_tile.hDefines iterators for efficiently loading and storing to global memory
 gemm_operand.hDefines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory
 gemm_shared_stream.hDefines abstractions for managing loading and storing fragments to shared memory in the efficient GEMM pipeline
 gemm_shared_tile.hDefines iterators for efficiently loading and storing tiles to and from shared memory
 gemm_stream_pair.hDefines a pair of GEMM tile streams
 gemm_traits.hDefines structural properties of complete GEMM computation
 hgemm_global_tile.hTile traits used to construct global tile iterator for HGEMM. This is intended to partition the thread block-level tile into 2D subtiles loaded by the threads and facilitate memory accesses larger than 16 bits
 hgemm_multiply_add.hSpecialization implementing multiply-add operation on half-precision floating point fragments
 hgemm_swizzle.hTransposes a tile of 16b elements. Used by HGEMM to construct a K-strided layout in shared memory for multiplicands
 hgemm_traits.hDefies structural properties of half-precision GEMM computation
 igemm_epilogue.hDefines the epilogue phase of the GEMM computation for IGEMM, supporting integer and floating-point output matrix formats
 igemm_global_tile.hImplements tile iterators to partition the thread block tile into 2D subtiles and efficiently load each. Applies permute transformation to construct 'interleaved K-strided' data layout in which 4-element dot products from the same K index are arranged in consecutive locations within shared memory
 igemm_multiply_add.hImplements matrix multiply accumulate operation of 8-bit integer data using DP4A instruction
 igemm_swizzle.hTransposes a fragment of data containing packed 8-bit integer elements
 igemm_traits.hDefies structural properties of mixed-precision integer GEMM. Multiplicands are assumed to be packed 8bit integers, accumulators are assumed to be 32b signed integers, and output formats vary
 iterator_access.hFree functions for loading and storing to implementations of tile iteartor concepts
 kernel_launch.hDefines structures and helpers to launch CUDA kernels within CUTLASS
 linear_scaling.hImplements the BLAS linear scaling function alpha*AB + beta*C
 linear_scaling_device_ptr.hImplements the BLAS linear scaling function alpha*AB + beta*C
 load_store.hDefines abstractions for efficiently loading and storing vectors to memory
 matrix_traits.hDefines properties of matrices used to denote layout and operands to GEMM kernels
 numeric_types.h
 platform.hC++ features that may be otherwise unimplemented for CUDA device functions
 predicate_vector.hDefines container classes and iterators for managing a statically sized vector of boolean predicates
 reshape_tile.hDefines a type for restructuring a tile
 scalar_or_pointer.hImplements the BLAS linear scaling function alpha*AB + beta*C
 sgemm_traits.hDefies structural properties of single-precision GEMM
 shape.hDefines Shape implementing the Layout concept for representing a 4D hypercube of objects
 tensor_ref.hDefines a structure containing strides, bounds, and a pointer to tensor data
 tensor_ref_collection.hIntroduces TensorRefCollection concept and defines TensorRefBatch and TensorRefArray
 tensor_view.hDefines a structure containing strides and a pointer to tensor data
 thread_multiply_add.hTemplate implementing matrix multiply-add operations on fragments
 threadblock_swizzle.hDefies functors for mapping blockIdx to partitions of the GEMM computation
 tile_allocation.hDefines a fragment based on a Shape<> template
 tile_coord.hDefines a coordinate used for the CUTLASS 4-D tile structure
 tile_iterator.hDefines the Tile Traits concept and iterators for loading and storing to tiles efficiently
 tile_stream.hImplements the tile stream concept, composing an iterator with a transformation. Offers split-phase semantics, separating the initiation of an asynchronous memory operation with a fence forcing it to complete
 tile_traits_standard.hDefines tile traits for several tile partitioning arrangements of threads expected to achieve efficient streaming performance
 vector.hDefines a 1D vector of elements held in the registers of each thread
 wmma_gemm_epilogue_traits.hDefines structural properties of WMMA GEMM's epilogue phase
 wmma_gemm_global_tile.hDefines tile iterator traits for loading thread block-level tile from global memory
 wmma_gemm_multiply_add.hImplements warp-level matrix multiply-accumulate operation using CUDA WMMA API
 wmma_gemm_shared_tile.hDefines iterator traits for efficiently loading and storing fragment to and from shared memory, specialized for WMMA GEMM
 wmma_gemm_traits.hDefies structural properties of GEMM targeting WMMA API in CUDA
 wmma_matrix.hAbstractions for loading and storing matrices using the CUDA WMMA API
 zip_fragment.hModels a pair of fragments
 zip_tensor_ref.hDefines a structure containing a pair of TensorRef-like objects
 zip_tile_iterator.hConstructs an iterator that owns two tile iterator instances
diff --git a/docs/fp16__sgemm__multiply__add_8h.html b/docs/fp16__sgemm__multiply__add_8h.html new file mode 100644 index 000000000..deff050ef --- /dev/null +++ b/docs/fp16__sgemm__multiply__add_8h.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: fp16_sgemm_multiply_add.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
fp16_sgemm_multiply_add.h File Reference
+
+
+ +

Template implementing matrix multiply-add operations on fragments. +More...

+ +

Go to the source code of this file.

+ + + + + +

+Classes

struct  cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >
 Template performing matrix multiply-add operation within a thread. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/fp16__sgemm__multiply__add_8h_source.html b/docs/fp16__sgemm__multiply__add_8h_source.html new file mode 100644 index 000000000..efac04637 --- /dev/null +++ b/docs/fp16__sgemm__multiply__add_8h_source.html @@ -0,0 +1,107 @@ + + + + + + + +Cutlass: fp16_sgemm_multiply_add.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
fp16_sgemm_multiply_add.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include "cutlass/fragment.h"
32 namespace cutlass {
33 namespace gemm {
34 
36 
38 template <typename ThreadGemmShape_,
39  typename ThreadsPerWarp_>
40 struct ThreadMultiplyAdd<ThreadGemmShape_, ThreadsPerWarp_, half, half, float> {
44  typedef ThreadGemmShape_ ThreadGemmShape;
48  typedef ThreadsPerWarp_ ThreadsPerWarp;
52  typedef half ScalarA;
56  typedef half ScalarB;
60  typedef float ScalarC;
63 
65  CUTLASS_DEVICE ThreadMultiplyAdd() {}
66 
68  CUTLASS_DEVICE void multiply_add(FragmentA const& a,
69  FragmentB const& b,
70  Accumulators const& c,
71  Accumulators& d) {
72  for (int j = 0; j < AccumulatorsPerThread::kH; ++j) {
73  for (int i = 0; i < AccumulatorsPerThread::kW; ++i) {
74  d[j * AccumulatorsPerThread::kW + i] = static_cast<ScalarC>(a[i]) * static_cast<ScalarC>(b[j]) + c[j * AccumulatorsPerThread::kW + i];
75  }
76  }
77  }
78 };
79 
81 
82 } // namespace gemm
83 } // namespace cutlass
Definition: convert.h:33
+
CUTLASS_DEVICE ThreadMultiplyAdd()
Ctor.
Definition: fp16_sgemm_multiply_add.h:65
+
Fragment< ScalarB, AccumulatorsPerThread::kH > FragmentB
The fragment for B.
Definition: fp16_sgemm_multiply_add.h:58
+
Shape< A_::kD *B_::kD, A_::kH *B_::kH, A_::kW *B_::kW, A_::kC *B_::kC > Shape
Definition: shape.h:119
+
A template defining Fragment Concept.
Definition: fragment.h:99
+
ShapeMul< ThreadGemmShape, ThreadsPerWarp >::Shape AccumulatorsPerWarp
The number of accumulators per warp.
Definition: fp16_sgemm_multiply_add.h:50
+
Template implementing matrix multiply-add operations on fragments.
+
ThreadGemmShape_ ThreadGemmShape
The shape of a thread-leveel matrix multiply accumulate.
Definition: fp16_sgemm_multiply_add.h:44
+
CUTLASS_DEVICE void multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
Multiply : d = a*b + c.
Definition: fp16_sgemm_multiply_add.h:68
+
half ScalarA
The type for A. specialized to half.
Definition: fp16_sgemm_multiply_add.h:52
+
half ScalarB
The type for B. specialized to half.
Definition: fp16_sgemm_multiply_add.h:56
+
ThreadsPerWarp_ ThreadsPerWarp
The number of threads per warp.
Definition: fp16_sgemm_multiply_add.h:48
+
Fragment< ScalarA, AccumulatorsPerThread::kW > FragmentA
The fragment for A.
Definition: fp16_sgemm_multiply_add.h:54
+
float ScalarC
The type for C and D. specialized to float.
Definition: fp16_sgemm_multiply_add.h:60
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Fragment< ScalarC, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW, 16 > Accumulators
The accumulators.
Definition: fp16_sgemm_multiply_add.h:62
+
ThreadGemmShape AccumulatorsPerThread
Aliased to "AccumulatorsPerThread" for compatibility. Expect to be renamed in CUTLASS v2...
Definition: fp16_sgemm_multiply_add.h:46
+
Template performing matrix multiply-add operation within a thread.
Definition: thread_multiply_add.h:44
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
Shape< 1, 1, 1, 1 > InstructionShape
The shape of the instruction.
Definition: fp16_sgemm_multiply_add.h:42
+
+ + + + diff --git a/docs/fp16__sgemm__traits_8h.html b/docs/fp16__sgemm__traits_8h.html new file mode 100644 index 000000000..0691fbbfc --- /dev/null +++ b/docs/fp16__sgemm__traits_8h.html @@ -0,0 +1,117 @@ + + + + + + + +Cutlass: fp16_sgemm_traits.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
fp16_sgemm_traits.h File Reference
+
+ + + + + diff --git a/docs/fp16__sgemm__traits_8h_source.html b/docs/fp16__sgemm__traits_8h_source.html new file mode 100644 index 000000000..b5f94457f --- /dev/null +++ b/docs/fp16__sgemm__traits_8h_source.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: fp16_sgemm_traits.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
fp16_sgemm_traits.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include "cutlass/gemm/gemm.h"
38 
39 namespace cutlass {
40 namespace gemm {
41 
43 
44 template <
46  typename OutputTile_,
48  typename ThreadGemmShape_,
50  typename ScalarA_,
52  typename ScalarB_,
54  typename ScalarC_,
56  typename ScalarD_,
58  int kScalarsPerLdgA_ = 1,
60  int kScalarsPerLdgB_ = 1>
61 struct Fp16SgemmConfig : public GemmConfig<
63  ScalarA_,
65  ScalarB_,
67  ScalarC_,
69  ScalarD_,
71  OutputTile_,
73  ThreadMultiplyAdd<ThreadGemmShape_, Shape<1, 4, 8>, ScalarA_, ScalarB_, float /*for sgemm accum is float*/>,
75  kScalarsPerLdgA_,
77  kScalarsPerLdgA_,
79  4,
81  kScalarsPerLdgB_,
83  kScalarsPerLdgB_,
85  4,
87  1,
89  4,
91  1,
93  2> {};
94 
96 
97 template <
99  MatrixLayout::Kind kLayoutA_,
101  MatrixLayout::Kind kLayoutB_,
103  typename OutputTile_ = Shape<8, 128, 128>,
105  typename ScalarA_ = half,
107  typename ScalarB_ = half,
109  typename ScalarC_ = half,
111  typename ScalarD_ = half,
113  typename Scalar_ = half,
115  typename EpilogueFunctor_ = LinearScaling<Scalar_, FragmentMultiplyAdd<Scalar_, float/*accumulator type*/> >,
117  typename ThreadGemmShape_ = Shape<8, 8, 8>,
119  int kScalarsPerLdgA_ = 1,
121  int kScalarsPerLdgB_ = 1,
123  typename Index_ = int,
125  typename GemmConfig_ =
126  Fp16SgemmConfig<OutputTile_,
127  ThreadGemmShape_,
128  ScalarA_,
129  ScalarB_,
130  ScalarC_,
131  ScalarD_,
132  kScalarsPerLdgA_,
133  kScalarsPerLdgB_>,
135  typename GemmEpilogueTraits_ =
138  // The layout for A.
139  kLayoutA_,
140  // The layout for B.
141  kLayoutB_,
142  // The config.
143  GemmConfig_,
144  // The epilogue.
145  GemmEpilogue<GemmEpilogueTraits_>,
146  // The index.
147  Index_> {};
148 
150 
151 } // namespace gemm
152 } // namespace cutlass
Definition: convert.h:33
+
Defines iterators for efficiently loading and storing to global memory.
+
Defines structural properties of complete GEMM computation.
+
Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the comp...
+
Defines iterators for efficiently loading and storing tiles to and from shared memory.
+
Definition: gemm_config.h:76
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Definition: gemm_epilogue_traits.h:323
+
Definition: fp16_sgemm_traits.h:61
+
Kind
Enumeration defining fundamental contiguous layouts.
Definition: matrix_traits.h:159
+
Template implementing matrix multiply-add operations on fragments.
+
Functor to compute linear combination of fragments.
Definition: linear_scaling.h:51
+
Implements a software-pipelined efficient GEMM.
+
Defines structural properties of the GEMM epilogue.
+
Definition: fp16_sgemm_traits.h:137
+
Definition: gemm_traits.h:650
+
Definition: fragment_multiply_add.h:41
+
+ + + + diff --git a/docs/fragment_8h.html b/docs/fragment_8h.html index d97ac7b5a..687dfdc86 100644 --- a/docs/fragment_8h.html +++ b/docs/fragment_8h.html @@ -83,15 +83,15 @@ $(function() {

Defines Fragment, a statically-sized array for storing parts of matrices within a thread's registers. More...

#include <assert.h>
-#include <cutlass/shape.h>
-#include <cutlass/util/cutlass_math.h>
-#include <cutlass/vector.h>
+#include "cutlass/shape.h"
+#include "cutlass/util/cutlass_math.h"
+#include "cutlass/vector.h"

Go to the source code of this file.

- + @@ -116,7 +116,7 @@ Namespaces diff --git a/docs/fragment_8h_source.html b/docs/fragment_8h_source.html index 8006bbbdf..f7d236565 100644 --- a/docs/fragment_8h_source.html +++ b/docs/fragment_8h_source.html @@ -76,64 +76,66 @@ $(function() {
fragment.h
-Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <assert.h>
32 #include <cutlass/shape.h>
34 #include <cutlass/vector.h>
35 
36 namespace cutlass {
37 
39 
56 
73 
75 template <int kAlignment_>
76 struct StorageType {
77  typedef uint64_t Type;
78 };
79 template <>
80 struct StorageType<4> {
81  typedef uint32_t Type;
82 };
83 template <>
84 struct StorageType<2> {
85  typedef uint16_t Type;
86 };
87 template <>
88 struct StorageType<1> {
89  typedef uint8_t Type;
90 };
91 
93 
98 template <typename Element_, int kElements_, size_t kAlignment_ = 16>
99 struct Fragment : public AlignedStruct<kAlignment_> {
101  static_assert(kAlignment_ == 16 || kAlignment_ >= sizeof(Element_), "Alignment is too small");
103  static_assert(is_pow2<kAlignment_>::value, "Alignment must be a power of two");
104 
108  typedef Element_ Element;
110  static int const kElements = kElements_;
111 
113  CUTLASS_DEVICE void clear() {
114  // Avoid element-wise access for sub 32b element type
115  if (kAlignment_ >= 8 && (kElements * sizeof(Element)) % 8 == 0) {
116  uint64_t* ptr = reinterpret_cast<uint64_t*>(storage);
117  for (int i = 0; i < (kElements * sizeof(Element)) / 8; ++i) {
118  ptr[i] = uint64_t(0);
119  }
120  } else if (kAlignment_ >= 4 && (kElements * sizeof(Element)) % 4 == 0) {
121  uint32_t* ptr = reinterpret_cast<uint32_t*>(storage);
122  for (int i = 0; i < (kElements * sizeof(Element)) / 4; ++i) {
123  ptr[i] = uint32_t(0);
124  }
125  } else if (kAlignment_ >= 2 && (kElements * sizeof(Element)) % 2 == 0) {
126  uint16_t* ptr = reinterpret_cast<uint16_t*>(storage);
127  for (int i = 0; i < (kElements * sizeof(Element)) / 2; ++i) {
128  ptr[i] = uint16_t(0);
129  }
130  } else {
131  for (int i = 0; i < kElements; ++i) {
132  storage[i] = 0;
133  }
134  }
135  }
136 
138  CUTLASS_DEVICE Element& operator[](int i) {
139  assert(i < kElements_);
140  return reinterpret_cast<Element*>(storage)[i];
141  }
142 
144  CUTLASS_DEVICE Element const& operator[](int i) const {
145  assert(i < kElements_);
146  return reinterpret_cast<Element const*>(storage)[i];
147  }
148 
149  private:
152 
154  static int const kStorageCount =
155  (sizeof(Element_) * kElements_ + sizeof(StorageType) - 1) / sizeof(StorageType);
157  StorageType storage[kStorageCount];
158 
160  static_assert(sizeof(StorageType) <= kAlignment_, "StorageType is too big for given alignment");
161 };
162 
164 
169 template <typename Fragment_, typename Iterations_, typename AccessType_>
174  typedef Fragment_ Fragment;
176  typedef Iterations_ Iterations;
178  typedef AccessType_ AccessType;
179 
181  typedef typename Fragment::Element Element;
183  static int const kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element));
188 
190  template <typename OtherFragment_>
191  CUTLASS_DEVICE FragmentIterator(OtherFragment_& fragment, int offset = 0)
192  : pointer(reinterpret_cast<Element*>(&fragment[offset])) {
193  static_assert(OtherFragment_::kElements >= Fragment::kElements, "");
194  }
195 
197  CUTLASS_DEVICE AccessType const& at(int d, int h, int w, int c = 0) const {
198  int const imm = ComputeOffsetFromStrides<Strides>::get(d, h, w, c);
199  return reinterpret_cast<AccessType const&>(pointer[imm]);
200  }
201 
203  CUTLASS_DEVICE AccessType& at(int d, int h, int w, int c = 0) {
204  int const imm = ComputeOffsetFromStrides<Strides>::get(d, h, w, c);
205  return reinterpret_cast<AccessType&>(pointer[imm]);
206  }
207 
209  CUTLASS_DEVICE AccessType const& operator[](int i) const {
210  return reinterpret_cast<AccessType const&>(pointer[i * kElementsPerAccess]);
211  }
212 
214  CUTLASS_DEVICE AccessType& operator[](int i) {
215  return reinterpret_cast<AccessType&>(pointer[i * kElementsPerAccess]);
216  }
217 
219  CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; }
220 
223 };
224 
226 
227 template <typename Fragment_, typename Iterations_, typename AccessType_>
232  typedef Fragment_ Fragment;
234  typedef Iterations_ Iterations;
236  typedef AccessType_ AccessType;
237 
239  typedef typename Fragment::Element Element;
241  static int const kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element));
246 
248  template <typename OtherFragment_>
249  CUTLASS_DEVICE FragmentConstIterator(OtherFragment_& fragment, int offset = 0)
250  : pointer(reinterpret_cast<Element const*>(&fragment[offset])) {
251  static_assert(OtherFragment_::kElements >= Fragment::kElements, "");
252  }
254  CUTLASS_DEVICE FragmentConstIterator(
256  : pointer(reinterpret_cast<Element const*>(rhs_.offset)) {}
257 
259  CUTLASS_DEVICE AccessType const& at(int d, int h, int w, int c = 0) const {
260  int const imm = ComputeOffsetFromStrides<IterationsStrides>::get(d, h, w, c);
261  return reinterpret_cast<AccessType const&>(pointer[imm]);
262  }
263 
265  CUTLASS_DEVICE AccessType const& operator[](int i) const {
266  return reinterpret_cast<AccessType const&>(pointer[i * kElementsPerAccess]);
267  }
268 
270  CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; }
271 
273  Element const* pointer;
274 };
275 
277 
278 } // namespace cutlass
CUTLASS_DEVICE void clear()
Clear a fragment.
Definition: fragment.h:113
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <assert.h>
32 #include "cutlass/shape.h"
34 #include "cutlass/vector.h"
35 
36 namespace cutlass {
37 
39 
56 
73 
75 template <int alignment>
76 struct StorageType {
77  typedef uint64_t Type;
78 };
79 template <>
80 struct StorageType<4> {
81  typedef uint32_t Type;
82 };
83 template <>
84 struct StorageType<2> {
85  typedef uint16_t Type;
86 };
87 template <>
88 struct StorageType<1> {
89  typedef uint8_t Type;
90 };
91 
93 
98 template <typename Element_, int kElements_, size_t kAlignment_ = 16>
99 struct Fragment : public AlignedStruct<kAlignment_> {
101  static_assert(kAlignment_ == 16 || kAlignment_ >= sizeof(Element_), "Alignment is too small");
103  static_assert(is_pow2<kAlignment_>::value, "Alignment must be a power of two");
104 
108  typedef Element_ Element;
110  static int const kElements = kElements_;
112  static int const kAlignment = kAlignment_;
113 
116  // Avoid element-wise access for sub 32b element type
117  if (kAlignment_ >= 8 && (kElements * sizeof(Element)) % 8 == 0) {
118  uint64_t* ptr = reinterpret_cast<uint64_t*>(storage);
119  for (int i = 0; i < (kElements * sizeof(Element)) / 8; ++i) {
120  ptr[i] = uint64_t(0);
121  }
122  } else if (kAlignment_ >= 4 && (kElements * sizeof(Element)) % 4 == 0) {
123  uint32_t* ptr = reinterpret_cast<uint32_t*>(storage);
124  for (int i = 0; i < (kElements * sizeof(Element)) / 4; ++i) {
125  ptr[i] = uint32_t(0);
126  }
127  } else if (kAlignment_ >= 2 && (kElements * sizeof(Element)) % 2 == 0) {
128  uint16_t* ptr = reinterpret_cast<uint16_t*>(storage);
129  for (int i = 0; i < (kElements * sizeof(Element)) / 2; ++i) {
130  ptr[i] = uint16_t(0);
131  }
132  } else {
133  for (int i = 0; i < kElements; ++i) {
134  storage[i] = 0;
135  }
136  }
137  }
138 
140  CUTLASS_HOST_DEVICE Element& operator[](int i) { return reinterpret_cast<Element*>(storage)[i]; }
141 
143  CUTLASS_HOST_DEVICE Element const& operator[](int i) const {
144  return reinterpret_cast<Element const*>(storage)[i];
145  }
146 
147  private:
150 
152  static int const kStorageCount =
153  (sizeof(Element_) * kElements_ + sizeof(StorageType) - 1) / sizeof(StorageType);
155  StorageType storage[kStorageCount];
156 
158  static_assert(sizeof(StorageType) <= kAlignment_, "StorageType is too big for given alignment");
159 };
160 
162 
167 template <typename Fragment_, typename Iterations_, typename AccessType_>
172  typedef Fragment_ Fragment;
174  typedef Iterations_ Iterations;
176  typedef AccessType_ AccessType;
177 
179  typedef typename Fragment::Element Element;
181  static int const kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element));
186 
188  template <typename OtherFragment_>
189  CUTLASS_HOST_DEVICE FragmentIterator(OtherFragment_& fragment, int offset = 0)
190  : pointer(reinterpret_cast<Element*>(&fragment[offset])) {
191  static_assert(OtherFragment_::kElements >= Fragment::kElements, "");
192  }
193 
195  CUTLASS_HOST_DEVICE AccessType const& at(int d, int h, int w, int c = 0) const {
196  int const imm = ComputeOffsetFromStrides<Strides>::get(d, h, w, c);
197  return reinterpret_cast<AccessType const&>(pointer[imm]);
198  }
199 
201  CUTLASS_HOST_DEVICE AccessType& at(int d, int h, int w, int c = 0) {
202  int const imm = ComputeOffsetFromStrides<Strides>::get(d, h, w, c);
203  return reinterpret_cast<AccessType&>(pointer[imm]);
204  }
205 
208  return reinterpret_cast<AccessType const&>(pointer[i * kElementsPerAccess]);
209  }
210 
213  return reinterpret_cast<AccessType&>(pointer[i * kElementsPerAccess]);
214  }
215 
217  CUTLASS_HOST_DEVICE bool valid(int d, int h, int w, int c) const { return true; }
218 
221 };
222 
224 
225 template <typename Fragment_, typename Iterations_, typename AccessType_>
230  typedef Fragment_ Fragment;
232  typedef Iterations_ Iterations;
234  typedef AccessType_ AccessType;
235 
237  typedef typename Fragment::Element Element;
239  static int const kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element));
244 
246  template <typename OtherFragment_>
247  CUTLASS_HOST_DEVICE FragmentConstIterator(OtherFragment_& fragment, int offset = 0)
248  : pointer(reinterpret_cast<Element const*>(&fragment[offset])) {
249  static_assert(OtherFragment_::kElements >= Fragment::kElements, "");
250  }
254  : pointer(reinterpret_cast<Element const*>(rhs_.offset)) {}
255 
257  CUTLASS_HOST_DEVICE AccessType const& at(int d, int h, int w, int c = 0) const {
258  int const imm = ComputeOffsetFromStrides<IterationsStrides>::get(d, h, w, c);
259  return reinterpret_cast<AccessType const&>(pointer[imm]);
260  }
261 
264  return reinterpret_cast<AccessType const&>(pointer[i * kElementsPerAccess]);
265  }
266 
268  CUTLASS_HOST_DEVICE bool valid(int d, int h, int w, int c) const { return true; }
269 
271  Element const* pointer;
272 };
273 
275 
276 } // namespace cutlass
CUTLASS_HOST_DEVICE void clear()
Clear a fragment.
Definition: fragment.h:115
+
CUTLASS_HOST_DEVICE bool valid(int d, int h, int w, int c) const
Is the iterator valid?
Definition: fragment.h:217
Definition: convert.h:33
-
CUTLASS_DEVICE Element & operator[](int i)
The accessor.
Definition: fragment.h:138
-
CUTLASS_DEVICE AccessType & at(int d, int h, int w, int c=0)
The accessor.
Definition: fragment.h:203
-
Definition: vector.h:41
-
Definition: fragment.h:228
-
CUTLASS_DEVICE AccessType const & operator[](int i) const
The accessor.
Definition: fragment.h:265
-
Shape< Shape_::kH *Shape_::kW *Shape_::kC, Shape_::kW *Shape_::kC, Shape_::kC, 1 > Shape
Definition: shape.h:155
+
Shape< Shape_::kH *Shape_::kW *Shape_::kC, Shape_::kW *Shape_::kC, Shape_::kC, elementsPerAccess > Shape
Definition: shape.h:170
+
Definition: vector.h:42
+
Definition: fragment.h:226
+
CUTLASS_HOST_DEVICE FragmentIterator(OtherFragment_ &fragment, int offset=0)
Ctor.
Definition: fragment.h:189
A template defining Fragment Concept.
Definition: fragment.h:99
-
Fragment::Element Element
The element.
Definition: fragment.h:181
-
static int const kElementsPerAccess
The number of elements per access.
Definition: fragment.h:241
-
Fragment_ Fragment
The fragment.
Definition: fragment.h:174
-
Fragment_ Fragment
The fragment.
Definition: fragment.h:232
-
CUTLASS_DEVICE AccessType & operator[](int i)
The accessor.
Definition: fragment.h:214
-
Fragment::Element Element
The element.
Definition: fragment.h:239
-
ShapeStrides< FragmentShape >::Shape IterationsStrides
The linear strides for iterations.
Definition: fragment.h:245
-
CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const
Is the iterator valid?
Definition: fragment.h:270
-
CUTLASS_DEVICE FragmentIterator(OtherFragment_ &fragment, int offset=0)
Ctor.
Definition: fragment.h:191
+
Fragment::Element Element
The element.
Definition: fragment.h:179
+
static int const kElementsPerAccess
The number of elements per access.
Definition: fragment.h:239
+
Fragment_ Fragment
The fragment.
Definition: fragment.h:172
+
Fragment_ Fragment
The fragment.
Definition: fragment.h:230
+
Fragment::Element Element
The element.
Definition: fragment.h:237
Fragment< Element_, kElements_ > This_
Make sure the alignment makes sense wrt the size of elements.
Definition: fragment.h:101
-
FragmentIterator< Fragment_, Iterations_, AccessType_ > This_
This class.
Definition: fragment.h:172
-
ShapeMul< Iterations, Shape< 1, 1, 1, kElementsPerAccess > >::Shape FragmentShape
The shape of the the fragment.
Definition: fragment.h:243
+
FragmentIterator< Fragment_, Iterations_, AccessType_ > This_
This class.
Definition: fragment.h:170
+
ShapeMul< Iterations, Shape< 1, 1, 1, kElementsPerAccess > >::Shape FragmentShape
The shape of the the fragment.
Definition: fragment.h:241
Math utilities.
Definition: fragment.h:76
uint32_t Type
Definition: fragment.h:81
uint8_t Type
Definition: fragment.h:89
-
static CUTLASS_DEVICE int get(int d, int h, int w, int c)
Definition: shape.h:211
-
Element * pointer
The pointer.
Definition: fragment.h:222
-
AccessType_ AccessType
The access type.
Definition: fragment.h:236
+
Element * pointer
The pointer.
Definition: fragment.h:220
+
CUTLASS_HOST_DEVICE Element const & operator[](int i) const
The accessor.
Definition: fragment.h:143
+
AccessType_ AccessType
The access type.
Definition: fragment.h:234
+
ShapeStrides< FragmentShape, kElementsPerAccess >::Shape IterationsStrides
The linear strides for iterations.
Definition: fragment.h:243
Definition: shape.h:118
-
ShapeMul< Iterations, Shape< 1, 1, 1, kElementsPerAccess > >::Shape FragmentShape
The shape of the the fragment.
Definition: fragment.h:185
-
A template defining Fragment Iterator Concept.
Definition: fragment.h:170
+
ShapeMul< Iterations, Shape< 1, 1, 1, kElementsPerAccess > >::Shape FragmentShape
The shape of the the fragment.
Definition: fragment.h:183
+
CUTLASS_HOST_DEVICE FragmentConstIterator(OtherFragment_ &fragment, int offset=0)
Ctor.
Definition: fragment.h:247
+
A template defining Fragment Iterator Concept.
Definition: fragment.h:168
static int const kElements
The number of elements.
Definition: fragment.h:110
-
CUTLASS_DEVICE Element const & operator[](int i) const
The accessor.
Definition: fragment.h:144
-
Iterations_ Iterations
The number of iterations.
Definition: fragment.h:234
-
#define static_assert(__e, __m)
Definition: platform.h:145
-
Iterations_ Iterations
The number of iterations.
Definition: fragment.h:176
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
Iterations_ Iterations
The number of iterations.
Definition: fragment.h:232
+
CUTLASS_HOST_DEVICE AccessType const & at(int d, int h, int w, int c=0) const
The accessor.
Definition: fragment.h:195
+
#define static_assert(__e, __m)
Definition: platform.h:153
+
Iterations_ Iterations
The number of iterations.
Definition: fragment.h:174
+
CUTLASS_HOST_DEVICE FragmentConstIterator(FragmentIterator< Fragment_, Iterations_, AccessType_ > const &rhs_)
Create from non-constant FragmentIterator.
Definition: fragment.h:252
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
-
CUTLASS_DEVICE AccessType const & at(int d, int h, int w, int c=0) const
The accessor.
Definition: fragment.h:259
Element_ Element
The element.
Definition: fragment.h:108
-
FragmentIterator< Fragment_, Iterations_, AccessType_ > This_
This class.
Definition: fragment.h:230
-
CUTLASS_DEVICE AccessType const & operator[](int i) const
The accessor.
Definition: fragment.h:209
+
FragmentIterator< Fragment_, Iterations_, AccessType_ > This_
This class.
Definition: fragment.h:228
+
CUTLASS_HOST_DEVICE AccessType const & operator[](int i) const
The accessor.
Definition: fragment.h:263
+
CUTLASS_HOST_DEVICE Element & operator[](int i)
The accessor.
Definition: fragment.h:140
+
CUTLASS_HOST_DEVICE AccessType const & operator[](int i) const
The accessor.
Definition: fragment.h:207
uint16_t Type
Definition: fragment.h:85
Defines a 1D vector of elements held in the registers of each thread.
-
CUTLASS_DEVICE FragmentConstIterator(FragmentIterator< Fragment_, Iterations_, AccessType_ > const &rhs_)
Create from non-constant FragmentIterator.
Definition: fragment.h:254
-
static int const kElementsPerAccess
The number of elements per access.
Definition: fragment.h:183
-
ShapeStrides< FragmentShape >::Shape Strides
The linear strides for iterations.
Definition: fragment.h:187
+
uint64_t Type
Definition: fragment.h:77
+
CUTLASS_HOST_DEVICE bool valid(int d, int h, int w, int c) const
Is the iterator valid?
Definition: fragment.h:268
+
ShapeStrides< FragmentShape, kElementsPerAccess >::Shape Strides
The linear strides for iterations.
Definition: fragment.h:185
+
static CUTLASS_HOST_DEVICE int get(int d, int h, int w, int c)
Definition: shape.h:199
+
CUTLASS_HOST_DEVICE AccessType & operator[](int i)
The accessor.
Definition: fragment.h:212
+
CUTLASS_HOST_DEVICE AccessType & at(int d, int h, int w, int c=0)
The accessor.
Definition: fragment.h:201
+
static int const kElementsPerAccess
The number of elements per access.
Definition: fragment.h:181
Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.
-
AccessType_ AccessType
The access type.
Definition: fragment.h:178
-
CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const
Is the iterator valid?
Definition: fragment.h:219
-
uint64_t Type
Definition: fragment.h:77
+
AccessType_ AccessType
The access type.
Definition: fragment.h:176
+
static int const kAlignment
Alignment.
Definition: fragment.h:112
Definition: cutlass_math.h:45
-
CUTLASS_DEVICE FragmentConstIterator(OtherFragment_ &fragment, int offset=0)
Ctor.
Definition: fragment.h:249
-
CUTLASS_DEVICE AccessType const & at(int d, int h, int w, int c=0) const
The accessor.
Definition: fragment.h:197
-
Element const * pointer
The pointer.
Definition: fragment.h:273
+
CUTLASS_HOST_DEVICE AccessType const & at(int d, int h, int w, int c=0) const
The accessor.
Definition: fragment.h:257
+
Element const * pointer
The pointer.
Definition: fragment.h:271
diff --git a/docs/fragment__multiply__add_8h.html b/docs/fragment__multiply__add_8h.html index 59a94dfdf..107cfee79 100644 --- a/docs/fragment__multiply__add_8h.html +++ b/docs/fragment__multiply__add_8h.html @@ -82,15 +82,15 @@ $(function() {

Defines multiply-add operations on fragments within a thread. More...

-

Classes

struct  cutlass::StorageType< kAlignment_ >
struct  cutlass::StorageType< alignment >
 
struct  cutlass::StorageType< 4 >
 
- + - +

Classes

struct  cutlass::gemm::FragmentMultiplyAdd< Scalar_ >
struct  cutlass::gemm::FragmentMultiplyAdd< ScalarAlphaBeta_, ScalarAccum_, fragMul2 >
 
struct  cutlass::gemm::FragmentMultiplyAdd< half >
struct  cutlass::gemm::FragmentMultiplyAdd< half, half, true >
 
diff --git a/docs/fragment__multiply__add_8h_source.html b/docs/fragment__multiply__add_8h_source.html index 9b453fd94..1d4c4f7f2 100644 --- a/docs/fragment__multiply__add_8h_source.html +++ b/docs/fragment__multiply__add_8h_source.html @@ -76,28 +76,26 @@ $(function() {
fragment_multiply_add.h
-Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/fragment.h>
31 
32 namespace cutlass {
33 namespace gemm {
34 
36 
37 template <typename Scalar_>
42  typedef Scalar_ ScalarA;
44  typedef Scalar_ ScalarB;
46  typedef Scalar_ ScalarC;
47 
49  CUTLASS_DEVICE FragmentMultiplyAdd() {}
50 
52  template <typename Fragment_>
53  CUTLASS_DEVICE void multiply(Scalar_ a, Fragment_ const& b, Fragment_& d) {
54  for (int j = 0; j < Fragment_::kElements; ++j) {
55  d[j] = a * b[j];
56  }
57  }
58 
60  template <typename Fragment_>
61  CUTLASS_DEVICE void multiply_add(Scalar_ a,
62  Fragment_ const& b,
63  Fragment_ const& c,
64  Fragment_& d) {
65  for (int j = 0; j < Fragment_::kElements; ++j) {
66  d[j] = a * b[j] + c[j];
67  }
68  }
69 };
70 
72 
73 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
74 template <>
75 struct FragmentMultiplyAdd<half> {
79  typedef half ScalarA;
81  typedef half ScalarB;
83  typedef half ScalarC;
84 
86  CUTLASS_DEVICE FragmentMultiplyAdd() {}
87 
89  template <typename Fragment_>
90  CUTLASS_DEVICE void multiply(half a, Fragment_ const& b, Fragment_& d) {
91 #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530
92  // The input.
93  __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]);
94  // The output.
95  __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]);
96 
97  // Assemble a half2 from a.
98  __half2 const a_half2 = __half2half2(a);
99 
100  for (int i = 0; i < Fragment_::kElements / 2; ++i) {
101  d_half2[i] = __hmul2(a_half2, b_half2[i]);
102  }
103 #endif
104  }
105 
107  template <typename Fragment_>
108  CUTLASS_DEVICE void multiply_add(half a, Fragment_ const& b, Fragment_ const& c, Fragment_& d) {
109 #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530
110  // The inputs.
111  __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]);
112  __half2 const* c_half2 = reinterpret_cast<__half2 const*>(&c[0]);
113  // The output.
114  __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]);
115 
116  // Assemble a half2 from a.
117  __half2 const a_half2 = __half2half2(a);
118 
119  for (int i = 0; i < Fragment_::kElements / 2; ++i) {
120  d_half2[i] = __hfma2(a_half2, b_half2[i], c_half2[i]);
121  }
122 #endif
123  }
124 };
125 
126 #endif
127 
129 
130 } // namespace gemm
131 } // namespace cutlass
Scalar_ ScalarB
The type for B.
Definition: fragment_multiply_add.h:44
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include "cutlass/fragment.h"
31 
32 namespace cutlass {
33 namespace gemm {
34 
36 
37 template < typename ScalarAlphaBeta_,
38  typename ScalarAccum_,
39  bool fragMul2 = true /*number of element per fragment is multiple of 2*/
40 >
45  typedef ScalarAlphaBeta_ ScalarAlphaBeta;
47  typedef ScalarAccum_ ScalarAccum;
48 
50  CUTLASS_DEVICE FragmentMultiplyAdd() {}
51 
53  template <typename FragmentB_, typename FragmentCd_>
54  CUTLASS_DEVICE void multiply(ScalarAlphaBeta a, FragmentB_ const& b, FragmentCd_& d) {
55 #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530
56  int const kReduction = FragmentB_::kElements / FragmentCd_::kElements;
57  for (int j = 0; j < FragmentCd_::kElements; ++j) {
58  d[j] = b[j * kReduction + 0];
59  for (int k = 1; k < kReduction; ++k) {
60  d[j] += b[j * kReduction + k];
61  }
62  d[j] = a * ScalarAlphaBeta(d[j]);
63  }
64 #endif
65  }
66 
68  template <typename FragmentB_, typename FragmentCd_>
69  CUTLASS_DEVICE void multiply_add(ScalarAlphaBeta a,
70  FragmentB_ const& b,
71  FragmentCd_ const& c,
72  FragmentCd_& d) {
73 #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530
74  int const kReduction = FragmentB_::kElements / FragmentCd_::kElements;
75  for (int j = 0; j < FragmentCd_::kElements; ++j) {
76  d[j] = b[j * kReduction + 0];
77  for (int k = 1; k < kReduction; ++k) {
78  d[j] += b[j * kReduction + k];
79  }
80  d[j] = a * ScalarAlphaBeta(d[j]) + ScalarAlphaBeta(c[j]);
81  }
82 #endif
83  }
84 };
85 
87 
88 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
89 template <>
90 struct FragmentMultiplyAdd<half, half, true> {
94  typedef half ScalarAlphaBeta;
96  typedef half ScalarAccum;
97 
99  CUTLASS_DEVICE FragmentMultiplyAdd() {}
100 
102  template <typename FragmentB_, typename FragmentCd_>
103  CUTLASS_DEVICE void multiply(half a, FragmentB_ const& b, FragmentCd_& d) {
104 #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530
105  // The input.
106  __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]);
107  // The output.
108  __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]);
109 
110  // Assemble a half2 from a.
111  __half2 const a_half2 = __half2half2(a);
112 
113  int const kReduction = (FragmentB_::kElements / FragmentCd_::kElements);
114 
115  for (int j = 0; j < FragmentCd_::kElements / 2; ++j) {
116  d_half2[j] = __hmul2(a_half2, b_half2[j * kReduction + 0]);
117 
118  for (int k = 1; k < kReduction; ++k) {
119  d_half2[j] = __hfma2(a_half2, b_half2[j * kReduction + k], d_half2[j]);
120  }
121  }
122 #endif
123  }
124 
125 
127  template <typename FragmentB_, typename FragmentCd_>
128  CUTLASS_DEVICE void multiply_add(half a,
129  FragmentB_ const& b,
130  FragmentCd_ const& c,
131  FragmentCd_& d) {
132 #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530
133  // The inputs.
134  __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]);
135  __half2 const* c_half2 = reinterpret_cast<__half2 const*>(&c[0]);
136  // The output.
137  __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]);
138 
139  // Assemble a half2 from a.
140  __half2 const a_half2 = __half2half2(a);
141 
142  int const kReduction = (FragmentB_::kElements / FragmentCd_::kElements);
143  for (int j = 0; j < FragmentCd_::kElements / 2; ++j) {
144  d_half2[j] = __hfma2(a_half2, b_half2[j * kReduction + 0], c_half2[j]);
145 
146  for (int k = 1; k < kReduction; ++k) {
147  d_half2[j] = __hfma2(a_half2, b_half2[j * kReduction + k], d_half2[j]);
148  }
149  }
150 #endif
151  }
152 };
153 
154 #endif
155 
157 
158 } // namespace gemm
159 } // namespace cutlass
CUTLASS_DEVICE void multiply(ScalarAlphaBeta a, FragmentB_ const &b, FragmentCd_ &d)
Multiply : d = a*b.
Definition: fragment_multiply_add.h:54
+
Shape< 1, 1, 1, 1 > InstructionShape
The shape of the instruction.
Definition: fragment_multiply_add.h:92
Definition: convert.h:33
-
CUTLASS_DEVICE void multiply(Scalar_ a, Fragment_ const &b, Fragment_ &d)
Multiply : d = a*b.
Definition: fragment_multiply_add.h:53
-
half ScalarA
The type for A.
Definition: fragment_multiply_add.h:79
-
CUTLASS_DEVICE FragmentMultiplyAdd()
Ctor.
Definition: fragment_multiply_add.h:86
-
CUTLASS_DEVICE void multiply_add(Scalar_ a, Fragment_ const &b, Fragment_ const &c, Fragment_ &d)
Multiply : d = a*b + c.
Definition: fragment_multiply_add.h:61
-
half ScalarC
The type for C and D.
Definition: fragment_multiply_add.h:83
-
CUTLASS_DEVICE void multiply_add(half a, Fragment_ const &b, Fragment_ const &c, Fragment_ &d)
Multiply : d = a*b + c.
Definition: fragment_multiply_add.h:108
+
half ScalarAlphaBeta
The type for alpha and beta.
Definition: fragment_multiply_add.h:94
+
CUTLASS_DEVICE FragmentMultiplyAdd()
Ctor.
Definition: fragment_multiply_add.h:50
+
CUTLASS_DEVICE FragmentMultiplyAdd()
Ctor.
Definition: fragment_multiply_add.h:99
+
CUTLASS_DEVICE void multiply(half a, FragmentB_ const &b, FragmentCd_ &d)
Multiply : d = a*b.
Definition: fragment_multiply_add.h:103
+
ScalarAccum_ ScalarAccum
The type for accumlator.
Definition: fragment_multiply_add.h:47
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
-
Shape< 1, 1, 1, 1 > InstructionShape
The shape of the instruction.
Definition: fragment_multiply_add.h:40
-
Scalar_ ScalarC
The type for C and D.
Definition: fragment_multiply_add.h:46
-
Scalar_ ScalarA
The type for A.
Definition: fragment_multiply_add.h:42
-
CUTLASS_DEVICE FragmentMultiplyAdd()
Ctor.
Definition: fragment_multiply_add.h:49
+
ScalarAlphaBeta_ ScalarAlphaBeta
The type for alpha and beta.
Definition: fragment_multiply_add.h:45
+
CUTLASS_DEVICE void multiply_add(half a, FragmentB_ const &b, FragmentCd_ const &c, FragmentCd_ &d)
Multiply : d = a*b + c.
Definition: fragment_multiply_add.h:128
+
Shape< 1, 1, 1, 1 > InstructionShape
The shape of the instruction.
Definition: fragment_multiply_add.h:43
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
-
CUTLASS_DEVICE void multiply(half a, Fragment_ const &b, Fragment_ &d)
Multiply : d = a*b.
Definition: fragment_multiply_add.h:90
-
Shape< 1, 1, 1, 1 > InstructionShape
The shape of the instruction.
Definition: fragment_multiply_add.h:77
-
half ScalarB
The type for B.
Definition: fragment_multiply_add.h:81
-
Definition: fragment_multiply_add.h:38
+
half ScalarAccum
The type for accumlator.
Definition: fragment_multiply_add.h:96
+
CUTLASS_DEVICE void multiply_add(ScalarAlphaBeta a, FragmentB_ const &b, FragmentCd_ const &c, FragmentCd_ &d)
Multiply : d = a*b + c.
Definition: fragment_multiply_add.h:69
+
Definition: fragment_multiply_add.h:41
diff --git a/docs/functions.html b/docs/functions.html index e6b156fbc..bdde612a8 100644 --- a/docs/functions.html +++ b/docs/functions.html @@ -71,77 +71,101 @@ $(function() {
Here is a list of all class members with links to the classes they belong to:

- a -

diff --git a/docs/functions_0x7e.html b/docs/functions_0x7e.html index 41aa664c4..0cb0e3458 100644 --- a/docs/functions_0x7e.html +++ b/docs/functions_0x7e.html @@ -78,7 +78,7 @@ $(function() { diff --git a/docs/functions_b.html b/docs/functions_b.html index 79038aa18..35c2018ba 100644 --- a/docs/functions_b.html +++ b/docs/functions_b.html @@ -71,42 +71,82 @@ $(function() {
Here is a list of all class members with links to the classes they belong to:

- b -

diff --git a/docs/functions_enum.html b/docs/functions_enum.html index b710de0fe..df8ae39f4 100644 --- a/docs/functions_enum.html +++ b/docs/functions_enum.html @@ -70,18 +70,20 @@ $(function() { diff --git a/docs/functions_eval.html b/docs/functions_eval.html index 40c01ec85..b53129338 100644 --- a/docs/functions_eval.html +++ b/docs/functions_eval.html @@ -77,6 +77,13 @@ $(function() { +

- b -

+ +

- k -

@@ -139,6 +160,13 @@ $(function() { +

- o -

+ +

- v -

diff --git a/docs/functions_func_g.html b/docs/functions_func_g.html index b30237366..b6258df4a 100644 --- a/docs/functions_func_g.html +++ b/docs/functions_func_g.html @@ -74,47 +74,73 @@ $(function() {
  • Gemm() : cutlass::gemm::Gemm< GemmTraits_ >
  • +
  • GemmCoord() +: cutlass::gemm::GemmCoord +
  • +
  • GemmDesc() +: cutlass::gemm::GemmDesc< AType_, BType_, CType_, DType_, SType_, Index_ > +
  • GemmEpilogue() -: cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ > +: cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
  • GemmGlobalIteratorAb() -: cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ > +: cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
  • GemmGlobalIteratorCd() -: cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ > +: cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
  • get() -: cutlass::ComputeOffsetFromShape< Shape_ > -, cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > > -, cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > > -, cutlass::ComputeOffsetFromStrides< Strides_ > -, cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > > -, cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > > +: cutlass::ComputeOffsetFromShape< Shape_ > +, cutlass::ComputeOffsetFromStrides< Strides_ > , cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ > , cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > > , cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > > +, cutlass::detail::ScalarOrPointer< Scalar_ > , cutlass::platform::unique_ptr< T, Deleter > , cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator
  • +
  • get_batch_id() +: cutlass::gemm::ColumnMajorBlockSwizzle< groupCols, swDirection > +, cutlass::gemm::IdentityBlockSwizzle +, cutlass::gemm::RowMajorBlockSwizzle< groupRows, swDirection > +
  • get_deleter() -: cutlass::platform::unique_ptr< T, Deleter > +: cutlass::platform::unique_ptr< T, Deleter > +
  • +
  • get_grid_layout() +: cutlass::gemm::ColumnMajorBlockSwizzle< groupCols, swDirection > +, cutlass::gemm::IdentityBlockSwizzle +, cutlass::gemm::RowMajorBlockSwizzle< groupRows, swDirection > +
  • +
  • get_pointer_offset() +: cutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > +
  • +
  • get_ptr() +: cutlass::detail::ScalarOrPointer< Scalar_ > +
  • +
  • get_scalar() +: cutlass::detail::ScalarOrPointer< Scalar_ > +
  • +
  • get_threadblock_offset() +: cutlass::gemm::ColumnMajorBlockSwizzle< groupCols, swDirection > +, cutlass::gemm::IdentityBlockSwizzle +, cutlass::gemm::RowMajorBlockSwizzle< groupRows, swDirection >
  • GlobalLoadStream() -: cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream -, cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ > +: cutlass::gemm::GlobalLoadStream< Operand, LoadIterator_, StoreIterator_, Transformer_ >
  • -
  • GlobalLoadStreamBase() -: cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ > +
  • GlobalLoadStreamPair() +: cutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >
  • good() -: cutlass::TensorRef< Storage_, Rank_ > -, cutlass::TensorView< T > +: cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > +, cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >
  • diff --git a/docs/functions_func_h.html b/docs/functions_func_h.html index 7eb85aac8..184c449e2 100644 --- a/docs/functions_func_h.html +++ b/docs/functions_func_h.html @@ -71,14 +71,23 @@ $(function() {  

    - h -

    diff --git a/docs/functions_func_i.html b/docs/functions_func_i.html index 16cfdc518..7c8680aba 100644 --- a/docs/functions_func_i.html +++ b/docs/functions_func_i.html @@ -72,90 +72,103 @@ $(function() {

    - i -

    diff --git a/docs/functions_func_k.html b/docs/functions_func_k.html new file mode 100644 index 000000000..ced1e9b63 --- /dev/null +++ b/docs/functions_func_k.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
    +
    +

    @@ -103,7 +103,7 @@ Namespaces

    + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    + + + + + + + + + + +
    +
    + + +
    + +
    + +
    +  + +

    - k -

    +
    + + + + diff --git a/docs/functions_func_l.html b/docs/functions_func_l.html index c76f9fc53..76d84a054 100644 --- a/docs/functions_func_l.html +++ b/docs/functions_func_l.html @@ -74,30 +74,56 @@ $(function() {
  • launch() : cutlass::gemm::Gemm< GemmTraits_ >
  • +
  • Launch() +: cutlass::gemm::Launch< Gemm, WithLaunchBounds > +, cutlass::gemm::Launch< Gemm, false > +
  • leading_dim() -: cutlass::TensorRef< Storage_, Rank_ > +: cutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ > +, cutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >
  • LinearScaling() -: cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ > +: cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ > +
  • +
  • LinearScalingDevicePtr() +: cutlass::gemm::LinearScalingDevicePtr< Scalar_, FragmentMultiplyAdd_ >
  • load() -: cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > -, cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > -, cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t > -, cutlass::Load< double, 2, Memory_, true, 16 > -, cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 > -, cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 > -, cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 > -, cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > +: cutlass::Load< Scalar_, kAccessSize, Memory_, kFragmentElementType, FragmentElement_, kStride, size > +, cutlass::Load< double, 2, Memory_, FragmentElementType::kScalar, double, kStride, 16 > +, cutlass::Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, 1, 2 > +, cutlass::Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 16 > +, cutlass::Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 4 > +, cutlass::Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 8 > +, cutlass::Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size > +, cutlass::Load< Vector< bin1_t, 32 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size > +, cutlass::Load< Vector< int4_t, 8 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size > +, cutlass::Load< Vector< uint4_t, 8 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size > +, cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, FragmentElementType_, Skew_ > +, cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, FragmentElementType_, Skew_ > +, cutlass::ZipTileIterator< First_, Second_ > +
  • +
  • load_element() +: cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ > +, cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ > +, cutlass::gemm::IgemmGlobalIteratorAb< TileTraits_, Index_ > +, cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ > +, cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, FragmentElementType_, Skew_ > +, cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, FragmentElementType_, Skew_ >
  • load_post_increment() -: cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > +: cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ > +, cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ > +, cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ > +, cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, FragmentElementType_, Skew_ > +, cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, FragmentElementType_, Skew_ > +, cutlass::ZipTileIterator< First_, Second_ >
  • diff --git a/docs/functions_func_m.html b/docs/functions_func_m.html index 2c68ec4f3..97f286560 100644 --- a/docs/functions_func_m.html +++ b/docs/functions_func_m.html @@ -71,23 +71,38 @@ $(function() {  

    - m -

    diff --git a/docs/functions_func_n.html b/docs/functions_func_n.html new file mode 100644 index 000000000..3b16224a8 --- /dev/null +++ b/docs/functions_func_n.html @@ -0,0 +1,89 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    + +
    + +
    +  + +

    - n -

    +
    + + + + diff --git a/docs/functions_func_o.html b/docs/functions_func_o.html index fb7b39f73..a7c0b04d3 100644 --- a/docs/functions_func_o.html +++ b/docs/functions_func_o.html @@ -72,8 +72,12 @@ $(function() {

    - o -

    diff --git a/docs/functions_type_f.html b/docs/functions_type_f.html index a71defeb3..8ba4932b8 100644 --- a/docs/functions_type_f.html +++ b/docs/functions_type_f.html @@ -72,45 +72,59 @@ $(function() {

    - f -

    diff --git a/docs/functions_type_g.html b/docs/functions_type_g.html index 4ae366f31..05c838d38 100644 --- a/docs/functions_type_g.html +++ b/docs/functions_type_g.html @@ -73,19 +73,19 @@ $(function() {

    - g -

    diff --git a/docs/functions_type_k.html b/docs/functions_type_k.html new file mode 100644 index 000000000..b5e5add55 --- /dev/null +++ b/docs/functions_type_k.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    + +
    + + + + + + diff --git a/docs/functions_type_l.html b/docs/functions_type_l.html index 2e7334f0d..abda026b1 100644 --- a/docs/functions_type_l.html +++ b/docs/functions_type_l.html @@ -72,13 +72,19 @@ $(function() {

    - l -

    diff --git a/docs/functions_type_m.html b/docs/functions_type_m.html index 043340a51..c76395492 100644 --- a/docs/functions_type_m.html +++ b/docs/functions_type_m.html @@ -71,26 +71,33 @@ $(function() {  

    - m -

    diff --git a/docs/functions_type_n.html b/docs/functions_type_n.html index bb5ad36c7..187630cfd 100644 --- a/docs/functions_type_n.html +++ b/docs/functions_type_n.html @@ -78,7 +78,7 @@ $(function() { diff --git a/docs/functions_type_o.html b/docs/functions_type_o.html index 42ed28139..d3d71d551 100644 --- a/docs/functions_type_o.html +++ b/docs/functions_type_o.html @@ -72,7 +72,7 @@ $(function() {

    - o -

    struct  cutlass::Fragment< Element_, kElements_, kAlignment_ >
     A template defining Fragment Concept. More...
     
    struct  cutlass::ZipFragment< First_, Second_ >
     A template defining Fragment Concept. More...
     

    Detailed Description

    Fragment Concept is a statically sized array for storing parts of tiles held by individual CUDA threads.

    @@ -94,7 +97,7 @@ Classes
    - +

    Classes

    struct  cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    struct  cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, FragmentElementType_, Skew_ >
     An iterator implementing Tile Load Iterator Concept for loading a tile from memory. More...
     
    @@ -96,7 +96,7 @@ Classes
    - +

    Classes

    struct  cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    struct  cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, FragmentElementType_, Skew_ >
     An iterator implementing Tile Store Iterator Concept for storing a tile to memory. More...
     
    @@ -96,7 +96,7 @@ Classes
    - +

    Classes

    struct  cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ >
    struct  cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_, AccessSize >
     A template defining Tile Traits Concept. More...
     
    @@ -93,7 +93,7 @@ Classes
    @@ -107,7 +107,7 @@ Namespaces diff --git a/docs/hgemm__global__tile_8h_source.html b/docs/hgemm__global__tile_8h_source.html index bdd647d1a..8d7e02f65 100644 --- a/docs/hgemm__global__tile_8h_source.html +++ b/docs/hgemm__global__tile_8h_source.html @@ -76,34 +76,34 @@ $(function() {
    hgemm_global_tile.h
    -Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    30 #pragma once
    31 
    32 #include <cutlass/coord.h>
    34 #include <cutlass/matrix_traits.h>
    35 #include <cutlass/reshape_tile.h>
    36 
    37 namespace cutlass {
    38 namespace gemm {
    39 
    41 
    42 template <GemmOperand::Kind kOperand_,
    43  MatrixLayout::Kind kLayout_,
    44  typename Scalar_,
    45  typename Tile_,
    46  typename Threads_,
    47  int kAccessSize_>
    49  // Which GEMM operand?
    50  kOperand_,
    51  // The layout.
    52  kLayout_,
    53  // The scalar.
    54  Scalar_,
    55  // The tile.
    56  Tile_,
    57  // The threads.
    58  Threads_,
    59  // The number of scalars per LDG/STG.
    60  kAccessSize_> {
    64  typedef typename Base::Threads Threads;
    70  typedef Shape<Base::Tile::kH / Base::Threads::kH / 2,
    71  2,
    72  Base::Tile::kW / Base::Threads::kW,
    73  Base::Tile::kC / Base::kAccessSize>
    76  struct ThreadOffset {
    78  Coord<4> operator()() const {
    79  int thread_offset_h = threadIdx.x / Threads::kW * ThreadsDelta::kH;
    80  int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW;
    81 
    82  return make_Coord(0, thread_offset_h, thread_offset_w, 0);
    83  }
    84  };
    85 };
    86 
    88 
    89 } // namespace gemm
    90 } // namespace cutlass
    Definition: convert.h:33
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    30 #pragma once
    31 
    32 #include "cutlass/coord.h"
    34 #include "cutlass/matrix_traits.h"
    35 #include "cutlass/reshape_tile.h"
    36 
    37 namespace cutlass {
    38 namespace gemm {
    39 
    41 
    42 template <GemmOperand::Kind kOperand_,
    43  MatrixLayout::Kind kLayout_,
    44  typename Scalar_,
    45  typename Tile_,
    46  typename Threads_,
    47  int kAccessSize_>
    49  // Which GEMM operand?
    50  kOperand_,
    51  // The layout.
    52  kLayout_,
    53  // The scalar.
    54  Scalar_,
    55  // The tile.
    56  Tile_,
    57  // The threads.
    58  Threads_,
    59  // The number of scalars per LDG/STG.
    60  kAccessSize_> {
    64  typedef typename Base::Threads Threads;
    70  typedef Shape<Base::VectorizedTile::kH / Base::Threads::kH / 2,
    71  2,
    72  Base::VectorizedTile::kW / Base::Threads::kW,
    73  Base::VectorizedTile::kC / Base::kAccessSize>
    76  struct ThreadOffset {
    78  Coord<4> operator()() const {
    79  int thread_offset_h = threadIdx.x / Threads::kW * ThreadsDelta::kH;
    80  int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW;
    81 
    82  return make_Coord(0, thread_offset_h, thread_offset_w, 0);
    83  }
    84  };
    85 };
    86 
    88 
    89 } // namespace gemm
    90 } // namespace cutlass
    Shape< Base::VectorizedTile::kH/Base::Threads::kH/2, 2, Base::VectorizedTile::kW/Base::Threads::kW, Base::VectorizedTile::kC/Base::kAccessSize > Iterations
    The number of iterations needed to load/store the tile.
    Definition: hgemm_global_tile.h:74
    +
    Definition: convert.h:33
    Defines iterators for efficiently loading and storing to global memory.
    Definition: gemm_global_tile.h:70
    A Coord is a coordinate of arbitrary rank into a tensor or matrix.
    -
    CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
    Helper to make a 2-element coordinate.
    Definition: coord.h:241
    -
    Shape< Base::Tile::kH/Base::Threads::kH/2, 2, Base::Tile::kW/Base::Threads::kW, Base::Tile::kC/Base::kAccessSize > Iterations
    The number of iterations needed to load/store the tile.
    Definition: hgemm_global_tile.h:74
    +
    CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
    Helper to make a 2-element coordinate.
    Definition: coord.h:318
    +
    Shape< 1, 2, Base::VectorizedTile::kC > ThreadsDelta
    The threads strides.
    Definition: hgemm_global_tile.h:66
    Base::Threads Threads
    The threads.
    Definition: hgemm_global_tile.h:64
    static int const kH
    The height of the cube.
    Definition: shape.h:68
    CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
    Definition: hgemm_global_tile.h:78
    Shape< Base::Threads::kH *2, 1, Base::Threads::kW, Base::kAccessSize > Delta
    The strides in each dimension between different loads/stores.
    Definition: hgemm_global_tile.h:68
    -
    Shape< 1, 2, Base::Tile::kC > ThreadsDelta
    The threads strides.
    Definition: hgemm_global_tile.h:66
    Defines a type for restructuring a tile.
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Base
    The base class.
    Definition: hgemm_global_tile.h:62
    Definition: hgemm_global_tile.h:48
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    +
    ReshapeThreads< VectorizedTile, Threads_ >::Threads Threads
    The threads shape.
    Definition: gemm_global_tile.h:88
    static int const kW
    The width of the cube.
    Definition: shape.h:70
    -
    Kind
    Definition: matrix_traits.h:36
    +
    Kind
    Enumeration defining fundamental contiguous layouts.
    Definition: matrix_traits.h:159
    static int const kAccessSize
    The number of scalars per LDG/STG.
    Definition: gemm_global_tile.h:80
    Computes the thread offset in (H, W) based on thread ID.
    Definition: hgemm_global_tile.h:76
    -
    Kind
    Definition: matrix_traits.h:43
    -
    ReshapeThreads< Tile, Threads_ >::Threads Threads
    The threads shape.
    Definition: gemm_global_tile.h:87
    +
    Kind
    Definition: matrix_traits.h:357
    Defines properties of matrices used to denote layout and operands to GEMM kernels.
    diff --git a/docs/hgemm__multiply__add_8h.html b/docs/hgemm__multiply__add_8h.html index 3c6c609e8..41ba8db9e 100644 --- a/docs/hgemm__multiply__add_8h.html +++ b/docs/hgemm__multiply__add_8h.html @@ -82,15 +82,15 @@ $(function() {

    Specialization implementing multiply-add operation on half-precision floating point fragments. More...

    -
    - - + +

    Classes

    struct  cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
     Template performing matrix multiply-add operation within a thread. More...
    struct  cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, half >
     Template performing matrix multiply-add operation within a thread. More...
     
    diff --git a/docs/hgemm__multiply__add_8h_source.html b/docs/hgemm__multiply__add_8h_source.html index 73ef90409..40e849bfb 100644 --- a/docs/hgemm__multiply__add_8h_source.html +++ b/docs/hgemm__multiply__add_8h_source.html @@ -76,30 +76,31 @@ $(function() {
    hgemm_multiply_add.h
    -Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include <cutlass/fragment.h>
    32 
    34 
    35 namespace cutlass {
    36 namespace gemm {
    37 
    39 
    41 template <typename AccumulatorsPerThread_, typename ThreadsPerWarp_>
    42 struct ThreadMultiplyAdd<AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half> {
    46  typedef AccumulatorsPerThread_ AccumulatorsPerThread;
    48  typedef ThreadsPerWarp_ ThreadsPerWarp;
    52  typedef half ScalarA;
    56  typedef half ScalarB;
    60  typedef half ScalarC;
    63 
    65  static_assert(AccumulatorsPerThread::kH % 2 == 0, "Invalid size");
    66  static_assert(AccumulatorsPerThread::kW % 2 == 0, "Invalid size");
    67 
    69  CUTLASS_DEVICE ThreadMultiplyAdd() {}
    70 
    72  CUTLASS_DEVICE void multiply_add(FragmentA const& a,
    73  FragmentB const& b,
    74  Accumulators const& c,
    75  Accumulators& d) {
    76 #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530
    77  // The inputs.
    78  __half2 const* a_half2 = reinterpret_cast<__half2 const*>(&a[0]);
    79  __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]);
    80  __half2 const* c_half2 = reinterpret_cast<__half2 const*>(&c[0]);
    81 
    82  // The output.
    83  __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]);
    84 
    85  for (int j = 0; j < AccumulatorsPerThread::kH / 2; ++j) {
    86  for (int i = 0; i < AccumulatorsPerThread::kW / 2; ++i) {
    87  // The offsets in the output fragment.
    88  int const k0 = (2 * j + 0) * (AccumulatorsPerThread::kW / 2) + i;
    89  int const k1 = (2 * j + 1) * (AccumulatorsPerThread::kW / 2) + i;
    90 
    91  // Compute the product a[i] * b[j].H0_H0.
    92  d_half2[k0] = __hfma2(a_half2[i], __low2half2(b_half2[j]), c_half2[k0]);
    93  // Compute the product a[i] * b[j].H1_H1.
    94  d_half2[k1] = __hfma2(a_half2[i], __high2half2(b_half2[j]), c_half2[k1]);
    95  }
    96  }
    97 #endif
    98  }
    99 };
    100 
    102 
    103 } // namespace gemm
    104 } // namespace cutlass
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include "cutlass/fragment.h"
    32 
    34 
    35 namespace cutlass {
    36 namespace gemm {
    37 
    39 
    41 template <typename ThreadGemmShape_, typename ThreadsPerWarp_>
    42 struct ThreadMultiplyAdd<ThreadGemmShape_, ThreadsPerWarp_, half, half, half> {
    46  typedef ThreadGemmShape_ ThreadGemmShape;
    50  typedef ThreadsPerWarp_ ThreadsPerWarp;
    54  typedef half ScalarA;
    58  typedef half ScalarB;
    62  typedef half ScalarC;
    65 
    67  static_assert(AccumulatorsPerThread::kH % 2 == 0, "Invalid size");
    68  static_assert(AccumulatorsPerThread::kW % 2 == 0, "Invalid size");
    69 
    71  CUTLASS_DEVICE ThreadMultiplyAdd() {}
    72 
    74  CUTLASS_DEVICE void multiply_add(FragmentA const& a,
    75  FragmentB const& b,
    76  Accumulators const& c,
    77  Accumulators& d) {
    78 #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530
    79  // The inputs.
    80  __half2 const* a_half2 = reinterpret_cast<__half2 const*>(&a[0]);
    81  __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]);
    82  __half2 const* c_half2 = reinterpret_cast<__half2 const*>(&c[0]);
    83 
    84  // The output.
    85  __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]);
    86 
    87  for (int j = 0; j < AccumulatorsPerThread::kH / 2; ++j) {
    88  for (int i = 0; i < AccumulatorsPerThread::kW / 2; ++i) {
    89  // The offsets in the output fragment.
    90  int const k0 = (2 * j + 0) * (AccumulatorsPerThread::kW / 2) + i;
    91  int const k1 = (2 * j + 1) * (AccumulatorsPerThread::kW / 2) + i;
    92 
    93  // Compute the product a[i] * b[j].low.
    94  d_half2[k0] = __hfma2(a_half2[i], __low2half2(b_half2[j]), c_half2[k0]);
    95  // Compute the product a[i] * b[j].high.
    96  d_half2[k1] = __hfma2(a_half2[i], __high2half2(b_half2[j]), c_half2[k1]);
    97  }
    98  }
    99 #endif
    100  }
    101 };
    102 
    104 
    105 } // namespace gemm
    106 } // namespace cutlass
    CUTLASS_DEVICE ThreadMultiplyAdd()
    Make sure there&#39;s an even number of elements in both dimensions.
    Definition: hgemm_multiply_add.h:71
    +
    half ScalarC
    The type for C and D.
    Definition: hgemm_multiply_add.h:62
    Definition: convert.h:33
    -
    Fragment< half, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW > Accumulators
    The accumulators.
    Definition: hgemm_multiply_add.h:62
    -
    ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
    The number of accumulators per warp.
    Definition: hgemm_multiply_add.h:50
    -
    half ScalarC
    The type for C and D.
    Definition: hgemm_multiply_add.h:60
    -
    CUTLASS_DEVICE ThreadMultiplyAdd()
    Make sure there&#39;s an even number of elements in both dimensions.
    Definition: hgemm_multiply_add.h:69
    +
    Fragment< ScalarB, AccumulatorsPerThread::kH > FragmentB
    The fragment for B.
    Definition: hgemm_multiply_add.h:60
    +
    ThreadGemmShape_ ThreadGemmShape
    The number of accumulators per thread.
    Definition: hgemm_multiply_add.h:46
    +
    Shape< A_::kD *B_::kD, A_::kH *B_::kH, A_::kW *B_::kW, A_::kC *B_::kC > Shape
    Definition: shape.h:119
    A template defining Fragment Concept.
    Definition: fragment.h:99
    Template implementing matrix multiply-add operations on fragments.
    -
    Shape< 1, 1, 2, 1 > InstructionShape
    The shape of the instruction.
    Definition: hgemm_multiply_add.h:44
    - -
    ThreadsPerWarp_ ThreadsPerWarp
    The number of threads per warp.
    Definition: hgemm_multiply_add.h:48
    -
    AccumulatorsPerThread_ AccumulatorsPerThread
    The number of accumulators per thread.
    Definition: hgemm_multiply_add.h:46
    -
    #define static_assert(__e, __m)
    Definition: platform.h:145
    -
    CUTLASS_DEVICE void multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
    Multiply : d = a*b + c.
    Definition: hgemm_multiply_add.h:72
    +
    Shape< 1, 1, 2, 1 > InstructionShape
    The shape of the instruction.
    Definition: hgemm_multiply_add.h:44
    +
    ShapeMul< ThreadGemmShape, ThreadsPerWarp >::Shape AccumulatorsPerWarp
    The number of accumulators per warp.
    Definition: hgemm_multiply_add.h:52
    +
    Fragment< ScalarA, AccumulatorsPerThread::kW > FragmentA
    The fragment for A.
    Definition: hgemm_multiply_add.h:56
    +
    Fragment< half, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW > Accumulators
    The accumulators.
    Definition: hgemm_multiply_add.h:64
    +
    CUTLASS_DEVICE void multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
    Multiply : d = a*b + c.
    Definition: hgemm_multiply_add.h:74
    + +
    #define static_assert(__e, __m)
    Definition: platform.h:153
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    -
    Template performing matrix multiply-add operation within a thread.
    Definition: thread_multiply_add.h:43
    +
    Template performing matrix multiply-add operation within a thread.
    Definition: thread_multiply_add.h:44
    +
    ThreadGemmShape AccumulatorsPerThread
    Aliased for compatibility. Will be removed for CUTLASS v2.0.
    Definition: hgemm_multiply_add.h:48
    +
    ThreadsPerWarp_ ThreadsPerWarp
    The number of threads per warp.
    Definition: hgemm_multiply_add.h:50
    Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
    -
    Fragment< ScalarA, AccumulatorsPerThread::kW > FragmentA
    The fragment for A.
    Definition: hgemm_multiply_add.h:54
    -
    Fragment< ScalarB, AccumulatorsPerThread::kH > FragmentB
    The fragment for B.
    Definition: hgemm_multiply_add.h:58
    diff --git a/docs/hgemm__swizzle_8h.html b/docs/hgemm__swizzle_8h.html index aef7ac75e..93938799b 100644 --- a/docs/hgemm__swizzle_8h.html +++ b/docs/hgemm__swizzle_8h.html @@ -83,7 +83,7 @@ $(function() {

    Transposes a tile of 16b elements. Used by HGEMM to construct a K-strided layout in shared memory for multiplicands. More...

    #include <cuda_fp16.h>
    -#include <cutlass/fragment.h>
    +#include "cutlass/fragment.h"

    Go to the source code of this file.

    @@ -103,7 +103,7 @@ Namespaces

    @@ -102,7 +102,7 @@ Namespaces diff --git a/docs/hgemm__swizzle_8h_source.html b/docs/hgemm__swizzle_8h_source.html index bb76b510c..d882c10f3 100644 --- a/docs/hgemm__swizzle_8h_source.html +++ b/docs/hgemm__swizzle_8h_source.html @@ -76,14 +76,14 @@ $(function() {
    hgemm_swizzle.h
    -Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include <cuda_fp16.h>
    32 #include <cutlass/fragment.h>
    33 
    34 namespace cutlass {
    35 namespace gemm {
    36 
    38 
    39 template <typename GlobalIterator_>
    40 struct HgemmSwizzle {
    42  typedef GlobalIterator_ GlobalIterator;
    44  typedef typename GlobalIterator::Fragment Fragment;
    46  typedef typename GlobalIterator::FragmentShape FragmentShape;
    47 
    52 
    55 
    57  static_assert(FragmentShape::kH == 2 && ShapeCount<FragmentShape>::kWc == 2, "Not multiple of 2");
    58 
    60  CUTLASS_DEVICE HgemmSwizzle() {}
    61 
    63  CUTLASS_DEVICE void transform(Fragment const& src, Fragment& dst) {
    64  // Expose src/dst as int arrays.
    65  int const* src_int = reinterpret_cast<int const*>(&src[0]);
    66  int* dst_int = reinterpret_cast<int*>(&dst[0]);
    67 
    68  // Transpose the data.
    69  for (int d = 0; d < FragmentShape::kD; ++d) {
    70  // The indices to read two consecutive "rows".
    71  int const i0 = 2 * d + 0;
    72  int const i1 = 2 * d + 1;
    73 
    74  int a0 = src_int[i0];
    75  int a1 = src_int[i1];
    76 
    77  int b0, b1;
    78  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b0) : "r"(a0), "r"(a1));
    79  asm volatile("prmt.b32 %0, %1, %2, 0x7632;" : "=r"(b1) : "r"(a0), "r"(a1));
    80 
    81  // The indices to store with "strides".
    82  int const j0 = 0 * (ShapeCount<FragmentShape>::kDhw / 2) + d;
    83  int const j1 = 1 * (ShapeCount<FragmentShape>::kDhw / 2) + d;
    84 
    85  dst_int[j0] = b0;
    86  dst_int[j1] = b1;
    87  }
    88  }
    89 };
    90 
    92 
    93 } // namespace gemm
    94 } // namespace cutlass
    GlobalIterator_ GlobalIterator
    The global iterator.
    Definition: hgemm_swizzle.h:42
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include <cuda_fp16.h>
    32 #include "cutlass/fragment.h"
    33 
    34 namespace cutlass {
    35 namespace gemm {
    36 
    38 
    39 template <typename GlobalIterator_>
    40 struct HgemmSwizzle {
    42  typedef GlobalIterator_ GlobalIterator;
    44  typedef typename GlobalIterator::Fragment Fragment;
    46  typedef typename GlobalIterator::FragmentShape FragmentShape;
    47 
    52 
    55 
    57  static_assert(FragmentShape::kH == 2 && ShapeCount<FragmentShape>::kWc == 2, "Not multiple of 2");
    58 
    60  CUTLASS_DEVICE HgemmSwizzle() {}
    61 
    63  CUTLASS_DEVICE void transform(Fragment const& src, Fragment& dst) {
    64  // Expose src/dst as int arrays.
    65  int const* src_int = reinterpret_cast<int const*>(&src[0]);
    66  int* dst_int = reinterpret_cast<int*>(&dst[0]);
    67 
    68  // Transpose the data.
    69  for (int d = 0; d < FragmentShape::kD; ++d) {
    70  // The indices to read two consecutive "rows".
    71  int const i0 = 2 * d + 0;
    72  int const i1 = 2 * d + 1;
    73 
    74  int a0 = src_int[i0];
    75  int a1 = src_int[i1];
    76 
    77  int b0, b1;
    78  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b0) : "r"(a0), "r"(a1));
    79  asm volatile("prmt.b32 %0, %1, %2, 0x7632;" : "=r"(b1) : "r"(a0), "r"(a1));
    80 
    81  // The indices to store with "strides".
    82  int const j0 = 0 * (ShapeCount<FragmentShape>::kDhw / 2) + d;
    83  int const j1 = 1 * (ShapeCount<FragmentShape>::kDhw / 2) + d;
    84 
    85  dst_int[j0] = b0;
    86  dst_int[j1] = b1;
    87  }
    88  }
    89 };
    90 
    92 
    93 } // namespace gemm
    94 } // namespace cutlass
    GlobalIterator_ GlobalIterator
    The global iterator.
    Definition: hgemm_swizzle.h:42
    Definition: convert.h:33
    -
    std::is_same (false specialization)
    Definition: platform.h:412
    +
    std::is_same (false specialization)
    Definition: platform.h:420
    CUTLASS_DEVICE HgemmSwizzle()
    The src/dst must be half fragments.
    Definition: hgemm_swizzle.h:60
    CUTLASS_DEVICE void transform(Fragment const &src, Fragment &dst)
    Transform a fragment.
    Definition: hgemm_swizzle.h:63
    Fragment InputFragment
    The input fragment.
    Definition: hgemm_swizzle.h:49
    Fragment OutputFragment
    The output fragment.
    Definition: hgemm_swizzle.h:51
    -
    #define static_assert(__e, __m)
    Definition: platform.h:145
    +
    #define static_assert(__e, __m)
    Definition: platform.h:153
    GlobalIterator::Fragment Fragment
    The source fragment.
    Definition: hgemm_swizzle.h:44
    Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
    GlobalIterator::FragmentShape FragmentShape
    The shape of the source fragment.
    Definition: hgemm_swizzle.h:46
    @@ -92,7 +92,7 @@ $(function() {
    diff --git a/docs/hgemm__traits_8h.html b/docs/hgemm__traits_8h.html index 283ceb750..bb8e72d99 100644 --- a/docs/hgemm__traits_8h.html +++ b/docs/hgemm__traits_8h.html @@ -82,23 +82,23 @@ $(function() {

    Defies structural properties of half-precision GEMM computation. More...

    -
    - + @@ -120,9 +120,9 @@ Classes - + - +

    Classes

    struct  cutlass::gemm::HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ >
    struct  cutlass::gemm::HgemmConfig< OutputTile_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_ >
     
    struct  cutlass::gemm::HgemmTransformerA< kLayout_, Iterator_ >
     
     
    struct  cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
     
    struct  cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    struct  cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
     
    struct  cutlass::gemm::HgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, Helper_ >
    struct  cutlass::gemm::HgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, Helper_ >
     
    diff --git a/docs/hgemm__traits_8h_source.html b/docs/hgemm__traits_8h_source.html index 0d12493ec..db1554c86 100644 --- a/docs/hgemm__traits_8h_source.html +++ b/docs/hgemm__traits_8h_source.html @@ -76,89 +76,87 @@ $(function() {
    hgemm_traits.h
    -Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/convert.h>
    31 #include <cutlass/reshape_tile.h>
    32 
    33 #include <cutlass/gemm/gemm.h>
    42 
    43 namespace cutlass {
    44 namespace gemm {
    45 
    47 
    48 template <
    50  typename OutputTile_,
    52  typename AccumulatorsPerThread_,
    54  int kScalarsPerLdgA_ = 2,
    56  int kScalarsPerLdgB_ = 2>
    58  : public GemmConfig<
    60  half,
    62  half,
    64  half,
    66  half,
    68  OutputTile_,
    70  ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, half, half, half>,
    72  kScalarsPerLdgA_,
    74  kScalarsPerLdgA_,
    76  8,
    78  kScalarsPerLdgB_,
    80  kScalarsPerLdgB_,
    82  8,
    84  2,
    86  8,
    88  2,
    90  2> {};
    91 
    93 
    94 template <enum MatrixLayout::Kind kLayout_, typename Iterator_>
    96 
    97 template <typename Iterator_>
    98 struct HgemmTransformerA<MatrixLayout::kColumnMajor, Iterator_> {
    100 };
    101 
    102 template <typename Iterator_>
    103 struct HgemmTransformerA<MatrixLayout::kRowMajor, Iterator_> {
    105 };
    106 
    108 
    109 template <enum MatrixLayout::Kind kLayout_, typename Iterator_>
    111 
    112 template <typename Iterator_>
    113 struct HgemmTransformerB<MatrixLayout::kRowMajor, Iterator_> {
    115 };
    116 
    117 template <typename Iterator_>
    118 struct HgemmTransformerB<MatrixLayout::kColumnMajor, Iterator_> {
    120 };
    121 
    123 
    124 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
    125 struct HgemmTileTraitsHelperA : public GemmTileTraitsHelperA<kLayout_, GemmConfig_> {};
    126 
    128 
    129 template <typename GemmConfig_>
    130 struct HgemmTileTraitsHelperA<MatrixLayout::kRowMajor, GemmConfig_>
    131  : public GemmTileTraitsHelperA<MatrixLayout::kRowMajor, GemmConfig_> {
    134 
    138  // The layout.
    140  // The pointer.
    141  half const,
    142  // The tile has size MxK in GEMM's terminology.
    144  // The threads are distributed as (threads / K ) x K (the traits may reorganize).
    145  Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>,
    146  // The number of scalars per LDG (LDG.32 or LDG.128, etc)
    147  GemmConfig_::kScalarsPerLdgA>
    149 
    152  // The pointer.
    153  half,
    154  // The tile has size KxM in GEMM's terminology.
    155  Shape<GemmConfig_::kStages,
    156  GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD,
    157  GemmConfig_::OutputTile::kW * GemmConfig_::InstructionShape::kD>,
    158  // The threads are distributed as warps x 32(the traits may reorganize).
    159  typename GlobalTileTraits::Threads,
    160  // The number of scalars per STS (STS.32 or STS.128, etc).
    161  2,
    162  // The skew to avoid bank conflicts added in the tile W dimension.
    163  128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2>
    165 
    168  // The pointer.
    169  half const,
    170  // The output tile size.
    171  typename GemmConfig_::OutputTile,
    172  // The number of warps.
    173  typename GemmConfig_::Warps,
    174  // The number of threads per warp.
    175  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
    176  // The shape of the FMA instruction.
    177  typename GemmConfig_::InstructionShape,
    178  // The number of stages.
    179  GemmConfig_::kStages,
    180  // The number of scalars per LDS.
    181  8,
    182  // The skew.
    183  SharedStoreTileTraits::kSkew>
    185 };
    186 
    188 
    189 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
    190 struct HgemmTileTraitsHelperB : public GemmTileTraitsHelperB<kLayout_, GemmConfig_> {};
    191 
    193 
    194 template <typename GemmConfig_>
    195 struct HgemmTileTraitsHelperB<MatrixLayout::kColumnMajor, GemmConfig_>
    196  : public GemmTileTraitsHelperB<MatrixLayout::kColumnMajor, GemmConfig_> {
    199 
    203  // The layout.
    205  // The pointer.
    206  half const,
    207  // The tile has size KxN in GEMM's terminology.
    209  // The threads are distributed as (threads / K) x K (the traits may reorganize).
    210  Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>,
    211  // The number of scalars per LDG (LDG.32 or LDG.128, etc)
    212  GemmConfig_::kScalarsPerLdgB>
    214 
    217  // The pointer.
    218  half,
    219  // The tile has size KxN in GEMM's terminology.
    220  Shape<GemmConfig_::kStages,
    221  GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD,
    222  GemmConfig_::OutputTile::kH * GemmConfig_::InstructionShape::kD>,
    223  // The threads are distributed as (threads / K) x K (the traits may reorganize).
    224  typename GlobalTileTraits::Threads,
    225  // The number of scalars per STS (STS.32 or STS.128, etc).
    226  2,
    227  // The skew to avoid bank conflicts added in the tile W dimension.
    228  128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2>
    230 
    233  // The pointer.
    234  half const,
    235  // The output tile size.
    236  typename GemmConfig_::OutputTile,
    237  // The number of warps.
    238  typename GemmConfig_::Warps,
    239  // The number of threads per warp.
    240  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
    241  // The shape of the FMA instruction.
    242  typename GemmConfig_::InstructionShape,
    243  // The number of stages.
    244  GemmConfig_::kStages,
    245  // The number of scalars per LDS.
    246  8,
    247  // The skew.
    248  SharedStoreTileTraits::kSkew>
    250 };
    251 
    253 
    254 template <
    256  MatrixLayout::Kind kLayoutA_,
    258  MatrixLayout::Kind kLayoutB_,
    260  typename OutputTile_,
    262  typename EpilogueFunctor_,
    264  typename AccumulatorsPerThread_ = Shape<32, 8, 8>,
    266  int kScalarsPerLdgA_ = 2,
    268  int kScalarsPerLdgB_ = 2,
    270  typename Index_ = int>
    279 
    284  typedef typename HgemmTransformerA<GemmTileTraitsHelperA::kLayout,
    287  typedef TileStoreIterator<typename GemmTileTraitsHelperA::SharedStoreTileTraits,
    288  typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar,
    295 
    299  // The default transformer for B.
    300  typedef typename HgemmTransformerB<GemmTileTraitsHelperB::kLayout,
    303  typedef TileStoreIterator<typename GemmTileTraitsHelperB::SharedStoreTileTraits,
    304  typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar,
    311 
    313  typedef TileLoadIterator<typename GemmTileTraitsHelperA::SharedLoadTileTraits,
    314  typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar,
    321  typedef TileLoadIterator<typename GemmTileTraitsHelperB::SharedLoadTileTraits,
    322  typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar,
    328 
    333 
    338 };
    339 
    341 
    342 template <
    344  MatrixLayout::Kind kLayoutA_,
    346  MatrixLayout::Kind kLayoutB_,
    348  typename OutputTile_ = Shape<8, 128, 128>,
    350  typename EpilogueFunctor_ = LinearScaling<half>,
    352  typename AccumulatorsPerThread_ = Shape<8, 8, 16>,
    354  int kScalarsPerLdgA_ = 2,
    356  int kScalarsPerLdgB_ = 2,
    358  typename Index_ = int,
    360  typename Helper_ = HgemmTraitsHelper<kLayoutA_,
    361  kLayoutB_,
    362  OutputTile_,
    363  EpilogueFunctor_,
    364  AccumulatorsPerThread_,
    365  kScalarsPerLdgA_,
    366  kScalarsPerLdgB_,
    367  Index_> >
    368 struct HgemmTraits : public GemmTraits<
    369  // The config.
    370  typename Helper_::GemmConfig,
    371  // The stream to load A from global memory to shared memory.
    372  typename Helper_::GlobalLoadStreamA,
    373  // The stream to load B from global memory to shared memory.
    374  typename Helper_::GlobalLoadStreamB,
    375  // The stream to load A from shared memory.
    376  typename Helper_::SharedLoadStreamA,
    377  // The stream to load B from shared memory.
    378  typename Helper_::SharedLoadStreamB,
    379  // The epilogue.
    380  typename Helper_::Epilogue,
    381  // The block swizzle to reorganize the grid.
    382  IdentityBlockSwizzle,
    383  // The index.
    384  Index_,
    385  // The tool used to clear accumulators.
    386  typename Helper_::ClearAccumulators> {};
    387 
    389 
    390 } // namespace gemm
    391 } // namespace cutlass
    GemmGlobalIteratorAb< typename GemmTileTraitsHelperA::GlobalTileTraits, Index_ > GlobalLoadIteratorA
    The iterator to load A from global memory.
    Definition: hgemm_traits.h:282
    -
    Definition: load_store.h:42
    -
    HgemmSwizzle< Iterator_ > Transformer
    Definition: hgemm_traits.h:119
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include "cutlass/convert.h"
    31 #include "cutlass/reshape_tile.h"
    32 
    33 #include "cutlass/gemm/gemm.h"
    42 
    43 namespace cutlass {
    44 namespace gemm {
    45 
    47 
    48 template <
    50  typename OutputTile_,
    52  typename ThreadGemmShape_,
    54  int kScalarsPerLdgA_ = 2,
    56  int kScalarsPerLdgB_ = 2>
    57 struct HgemmConfig : public GemmConfig<
    59  half,
    61  half,
    63  half,
    65  half,
    67  OutputTile_,
    69  ThreadMultiplyAdd<ThreadGemmShape_, Shape<1, 4, 8>, half, half, half>,
    71  kScalarsPerLdgA_,
    73  kScalarsPerLdgA_,
    75  8,
    77  kScalarsPerLdgB_,
    79  kScalarsPerLdgB_,
    81  8,
    83  2,
    85  8,
    87  2,
    89  2,
    91  false,
    93  true,
    95  false
    96  > {};
    97 
    99 
    100 template <enum MatrixLayout::Kind kLayout_, typename Iterator_>
    102 
    103 template <typename Iterator_>
    104 struct HgemmTransformerA<MatrixLayout::kColumnMajor, Iterator_> {
    106 };
    107 
    108 template <typename Iterator_>
    109 struct HgemmTransformerA<MatrixLayout::kRowMajor, Iterator_> {
    111 };
    112 
    114 
    115 template <enum MatrixLayout::Kind kLayout_, typename Iterator_>
    117 
    118 template <typename Iterator_>
    119 struct HgemmTransformerB<MatrixLayout::kRowMajor, Iterator_> {
    121 };
    122 
    123 template <typename Iterator_>
    124 struct HgemmTransformerB<MatrixLayout::kColumnMajor, Iterator_> {
    126 };
    127 
    129 
    130 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
    131 struct HgemmTileTraitsHelperA : public GemmTileTraitsHelperA<kLayout_, GemmConfig_> {};
    132 
    134 
    135 template <typename GemmConfig_>
    136 struct HgemmTileTraitsHelperA<MatrixLayout::kRowMajor, GemmConfig_>
    137  : public GemmTileTraitsHelperA<MatrixLayout::kRowMajor, GemmConfig_> {
    140 
    144  // The layout.
    146  // The pointer.
    147  half const,
    148  // The tile has size MxK in GEMM's terminology.
    150  // The threads are distributed as (threads / K ) x K (the traits may reorganize).
    151  Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>,
    152  // The number of scalars per LDG (LDG.32 or LDG.128, etc)
    153  GemmConfig_::kScalarsPerLdgA>
    155 
    156  static int const kSkewA = 128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2;
    157 
    160  // The pointer.
    161  half,
    162  // The tile has size KxM in GEMM's terminology.
    163  Shape<GemmConfig_::kStages,
    164  GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD,
    165  GemmConfig_::OutputTile::kW * GemmConfig_::InstructionShape::kD>,
    166  // The threads are distributed as warps x 32(the traits may reorganize).
    167  typename GlobalTileTraits::Threads,
    168  // The number of scalars per STS (STS.32 or STS.128, etc).
    169  2,
    170  // The skew to avoid bank conflicts added in the tile W dimension.
    171  kSkewA<GemmConfig_::kScalarsPerLdsA ? GemmConfig_::kScalarsPerLdsA : kSkewA>
    172  SharedStoreTileTraits;
    173 
    176  // The pointer.
    177  half const,
    178  // The output tile size.
    179  typename GemmConfig_::OutputTile,
    180  // The number of warps.
    181  typename GemmConfig_::Warps,
    182  // The number of threads per warp.
    183  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
    184  // The shape of the FMA instruction.
    185  typename GemmConfig_::InstructionShape,
    186  // The number of stages.
    187  GemmConfig_::kStages,
    188  // The number of scalars per LDS.
    189  8,
    190  // The skew.
    191  SharedStoreTileTraits::kSkew>
    192  SharedLoadTileTraits;
    193 };
    194 
    196 
    197 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
    198 struct HgemmTileTraitsHelperB : public GemmTileTraitsHelperB<kLayout_, GemmConfig_> {};
    199 
    201 
    202 template <typename GemmConfig_>
    203 struct HgemmTileTraitsHelperB<MatrixLayout::kColumnMajor, GemmConfig_>
    204  : public GemmTileTraitsHelperB<MatrixLayout::kColumnMajor, GemmConfig_> {
    207 
    211  // The layout.
    213  // The pointer.
    214  half const,
    215  // The tile has size KxN in GEMM's terminology.
    217  // The threads are distributed as (threads / K) x K (the traits may reorganize).
    218  Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>,
    219  // The number of scalars per LDG (LDG.32 or LDG.128, etc)
    220  GemmConfig_::kScalarsPerLdgB>
    222 
    223  static int const kSkewB = 128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2;
    224 
    227  // The pointer.
    228  half,
    229  // The tile has size KxN in GEMM's terminology.
    230  Shape<GemmConfig_::kStages,
    231  GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD,
    232  GemmConfig_::OutputTile::kH * GemmConfig_::InstructionShape::kD>,
    233  // The threads are distributed as (threads / K) x K (the traits may reorganize).
    234  typename GlobalTileTraits::Threads,
    235  // The number of scalars per STS (STS.32 or STS.128, etc).
    236  2,
    237  // The skew to avoid bank conflicts added in the tile W dimension.
    238  kSkewB<GemmConfig_::kScalarsPerLdsB ? GemmConfig_::kScalarsPerLdsB : kSkewB>
    239  SharedStoreTileTraits;
    240 
    243  // The pointer.
    244  half const,
    245  // The output tile size.
    246  typename GemmConfig_::OutputTile,
    247  // The number of warps.
    248  typename GemmConfig_::Warps,
    249  // The number of threads per warp.
    250  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
    251  // The shape of the FMA instruction.
    252  typename GemmConfig_::InstructionShape,
    253  // The number of stages.
    254  GemmConfig_::kStages,
    255  // The number of scalars per LDS.
    256  8,
    257  // The skew.
    258  SharedStoreTileTraits::kSkew>
    259  SharedLoadTileTraits;
    260 };
    261 
    263 
    264 template <
    266  MatrixLayout::Kind kLayoutA_,
    268  MatrixLayout::Kind kLayoutB_,
    270  typename OutputTile_,
    272  typename EpilogueFunctor_,
    274  typename ThreadGemmShape_,
    276  int kScalarsPerLdgA_ = 2,
    278  int kScalarsPerLdgB_ = 2,
    280  typename Index_ = int>
    288 
    293  typedef typename HgemmTransformerA<GemmTileTraitsHelperA::kLayout,
    296  typedef TileStoreIterator<typename GemmTileTraitsHelperA::SharedStoreTileTraits,
    297  typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar,
    307 
    311  // The default transformer for B.
    312  typedef typename HgemmTransformerB<GemmTileTraitsHelperB::kLayout,
    315  typedef TileStoreIterator<typename GemmTileTraitsHelperB::SharedStoreTileTraits,
    316  typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar,
    326 
    328  typedef TileLoadIterator<typename GemmTileTraitsHelperA::SharedLoadTileTraits,
    329  typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar,
    336  typedef TileLoadIterator<typename GemmTileTraitsHelperB::SharedLoadTileTraits,
    337  typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar,
    343 
    348 
    353 };
    354 
    356 
    357 template <
    359  MatrixLayout::Kind kLayoutA_,
    361  MatrixLayout::Kind kLayoutB_,
    363  typename OutputTile_ = Shape<8, 128, 128>,
    365  typename EpilogueFunctor_ = LinearScaling<half>,
    367  typename ThreadGemmShape_ = Shape<8, 8, 16>,
    369  int kScalarsPerLdgA_ = 2,
    371  int kScalarsPerLdgB_ = 2,
    373  typename Index_ = int,
    375  typename Helper_ = HgemmTraitsHelper<kLayoutA_,
    376  kLayoutB_,
    377  OutputTile_,
    378  EpilogueFunctor_,
    379  ThreadGemmShape_,
    380  kScalarsPerLdgA_,
    381  kScalarsPerLdgB_,
    382  Index_> >
    383 struct HgemmTraits : public GemmTraits<
    384  // The config.
    385  typename Helper_::GemmConfig,
    386  // The stream to load A from global memory to shared memory.
    387  typename Helper_::GlobalLoadStreamA,
    388  // The stream to load B from global memory to shared memory.
    389  typename Helper_::GlobalLoadStreamB,
    390  // The stream to load A from shared memory.
    391  typename Helper_::SharedLoadStreamA,
    392  // The stream to load B from shared memory.
    393  typename Helper_::SharedLoadStreamB,
    394  // The epilogue.
    395  typename Helper_::Epilogue,
    396  // The block swizzle to reorganize the grid.
    397  IdentityBlockSwizzle,
    398  // The index.
    399  Index_,
    400  // The tool used to clear accumulators.
    401  typename Helper_::ClearAccumulators> {};
    402 
    404 
    405 } // namespace gemm
    406 } // namespace cutlass
    SharedLoadStream< SharedLoadIteratorB > SharedLoadStreamB
    The stream to load B from shared memory.
    Definition: hgemm_traits.h:342
    +
    GemmGlobalIteratorAb< typename GemmTileTraitsHelperB::GlobalTileTraits, Index_ > GlobalLoadIteratorB
    The iterator to load B from global memory.
    Definition: hgemm_traits.h:310
    +
    Definition: load_store.h:41
    +
    HgemmSwizzle< Iterator_ > Transformer
    Definition: hgemm_traits.h:125
    Definition: convert.h:33
    -
    Definition: gemm_shared_tile.h:129
    +
    HgemmConfig< OutputTile_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_ > GemmConfig
    The HGEMM config.
    Definition: hgemm_traits.h:283
    +
    Definition: gemm_shared_tile.h:128
    -
    Definition: gemm_epilogue.h:53
    +
    Definition: gemm_epilogue.h:42
    Defines iterators for efficiently loading and storing to global memory.
    -
    GemmGlobalIteratorAb< typename GemmTileTraitsHelperB::GlobalTileTraits, Index_ > GlobalLoadIteratorB
    The iterator to load B from global memory.
    Definition: hgemm_traits.h:298
    -
    ClearAccumulators< typename MultiplyAdd::ScalarC > ClearAccumulators
    The object to clear accumulators.
    Definition: hgemm_traits.h:332
    +
    SimplifiedGemmEpilogueTraits< GemmConfig, EpilogueFunctor_, Index_ > GemmEpilogueTraits
    The traits class for the epilogue.
    Definition: hgemm_traits.h:350
    Defines structural properties of complete GEMM computation.
    -
    TileStoreIterator< typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorA
    The iterator to store A to shared memory.
    Definition: hgemm_traits.h:291
    -
    GlobalLoadStream< GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA > GlobalLoadStreamA
    The stream to load A from global memory to shared memory.
    Definition: hgemm_traits.h:294
    -
    HgemmCrosswiseGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, half const, Shape< 1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
    The traits class to build the iterator to load data from global memory for B^N.
    Definition: hgemm_traits.h:213
    -
    Definition: hgemm_traits.h:95
    -
    GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Base
    The base config.
    Definition: hgemm_traits.h:198
    -
    SharedLoadStream< SharedLoadIteratorA > SharedLoadStreamA
    The stream to load A from shared memory.
    Definition: hgemm_traits.h:319
    -
    Convert< typename Iterator_::Fragment, typename Iterator_::Fragment > Transformer
    Definition: hgemm_traits.h:99
    -
    Definition: hgemm_traits.h:368
    -
    HgemmSwizzle< Iterator_ > Transformer
    Definition: hgemm_traits.h:104
    -
    Definition: tile_iterator.h:62
    -
    Definition: gemm_shared_tile.h:198
    -
    TileLoadIterator< typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorB
    The iterator to load B from shared memory.
    Definition: hgemm_traits.h:325
    -
    Definition: gemm_global_tile.h:159
    -
    GemmEpilogue< GemmEpilogueTraits > Epilogue
    The epilogue.
    Definition: hgemm_traits.h:337
    -
    HgemmTransformerA< GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA >::Transformer GlobalTransformerA
    The default transformer for A.
    Definition: hgemm_traits.h:285
    +
    HgemmCrosswiseGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, half const, Shape< 1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
    The traits class to build the iterator to load data from global memory for B^N.
    Definition: hgemm_traits.h:221
    +
    Definition: hgemm_traits.h:101
    +
    GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Base
    The base config.
    Definition: hgemm_traits.h:206
    +
    GemmEpilogue< GemmEpilogueTraits > Epilogue
    The epilogue.
    Definition: hgemm_traits.h:352
    +
    Convert< typename Iterator_::Fragment, typename Iterator_::Fragment > Transformer
    Definition: hgemm_traits.h:105
    +
    GlobalLoadStream< GemmOperand::kA, GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA > GlobalLoadStreamA
    The stream to load A from global memory to shared memory.
    Definition: hgemm_traits.h:306
    +
    Definition: hgemm_traits.h:383
    +
    HgemmSwizzle< Iterator_ > Transformer
    Definition: hgemm_traits.h:110
    +
    TileLoadIterator< typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorB
    The iterator to load B from shared memory.
    Definition: hgemm_traits.h:340
    +
    HgemmTransformerA< GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA >::Transformer GlobalTransformerA
    The default transformer for A.
    Definition: hgemm_traits.h:294
    +
    Definition: tile_iterator.h:65
    +
    Definition: gemm_shared_tile.h:200
    +
    Definition: gemm_global_tile.h:163
    Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the comp...
    -
    Definition: gemm_global_stream.h:161
    -
    Definition: gemm_traits.h:273
    -
    Definition: hgemm_traits.h:125
    -
    Describes layouts of matrices.
    Definition: matrix_traits.h:35
    -
    SharedLoadStream< SharedLoadIteratorB > SharedLoadStreamB
    The stream to load B from shared memory.
    Definition: hgemm_traits.h:327
    -
    Definition: hgemm_traits.h:110
    -
    GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Base
    The base config.
    Definition: hgemm_traits.h:133
    -
    TileLoadIterator< typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorA
    The iterator to load A from shared memory.
    Definition: hgemm_traits.h:317
    -
    An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
    Definition: tile_iterator.h:302
    -
    SimplifiedGemmEpilogueTraits< GemmConfig, EpilogueFunctor_, Index_ > GemmEpilogueTraits
    The traits class for the epilogue.
    Definition: hgemm_traits.h:335
    +
    Definition: gemm_global_stream.h:52
    +
    Definition: gemm_traits.h:191
    +
    Definition: hgemm_traits.h:131
    +
    HgemmTileTraitsHelperA< kLayoutA_, GemmConfig > GemmTileTraitsHelperA
    The GEMM config for A.
    Definition: hgemm_traits.h:285
    +
    Defines data layouts of various matrix formats usable by TensorRef and other classes.
    Definition: matrix_traits.h:156
    +
    Definition: hgemm_traits.h:116
    +
    GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Base
    The base config.
    Definition: hgemm_traits.h:139
    +
    An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
    Definition: tile_iterator.h:399
    Defines iterators for efficiently loading and storing tiles to and from shared memory.
    -
    Definition: matrix_traits.h:36
    - -
    Definition: gemm_shared_stream.h:44
    +
    Definition: matrix_traits.h:159
    + +
    Definition: gemm_shared_stream.h:45
    +
    HgemmTransformerB< GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB >::Transformer GlobalTransformerB
    Definition: hgemm_traits.h:313
    Defines a type for restructuring a tile.
    +
    ClearAccumulators< typename MultiplyAdd::ScalarC > ClearAccumulators
    The object to clear accumulators.
    Definition: hgemm_traits.h:347
    Specialization implementing multiply-add operation on half-precision floating point fragments...
    -
    Definition: gemm_traits.h:79
    +
    Definition: gemm_config.h:76
    +
    TileLoadIterator< typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorA
    The iterator to load A from shared memory.
    Definition: hgemm_traits.h:332
    Transposes a tile of 16b elements. Used by HGEMM to construct a K-strided layout in shared memory for...
    -
    Definition: gemm_traits.h:137
    -
    GemmSharedLoadTileBTraits< half const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, 8, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
    The traits class to build the iterator to load from shared memory for B^N.
    Definition: hgemm_traits.h:249
    -
    Definition: matrix_traits.h:43
    -
    HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > GemmConfig
    The HGEMM config.
    Definition: hgemm_traits.h:274
    -
    Definition: hgemm_traits.h:190
    -
    GlobalLoadStream< GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB > GlobalLoadStreamB
    The stream to load B from global memory to shared memory.
    Definition: hgemm_traits.h:310
    -
    GemmConfig::MultiplyAdd MultiplyAdd
    The functor to do the multiply-add in the main loop.
    Definition: hgemm_traits.h:330
    -
    HgemmTileTraitsHelperB< kLayoutB_, GemmConfig > GemmTileTraitsHelperB
    The GEMM config for B.
    Definition: hgemm_traits.h:278
    -
    Definition: gemm_traits.h:428
    +
    Definition: gemm_traits.h:52
    +
    Definition: matrix_traits.h:357
    +
    Definition: hgemm_traits.h:198
    +
    GemmConfig::MultiplyAdd MultiplyAdd
    The functor to do the multiply-add in the main loop.
    Definition: hgemm_traits.h:345
    +
    Definition: gemm_traits.h:349
    +
    HgemmTileTraitsHelperB< kLayoutB_, GemmConfig > GemmTileTraitsHelperB
    The GEMM config for B.
    Definition: hgemm_traits.h:287
    Definition: hgemm_global_tile.h:48
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    -
    Definition: gemm_epilogue_traits.h:300
    -
    GemmSharedLoadTileATraits< half const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, 8, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
    The traits class to build the iterator to load from shared memory for A^T.
    Definition: hgemm_traits.h:184
    -
    HgemmTileTraitsHelperA< kLayoutA_, GemmConfig > GemmTileTraitsHelperA
    The GEMM config for A.
    Definition: hgemm_traits.h:276
    -
    Template performing matrix multiply-add operation within a thread.
    Definition: thread_multiply_add.h:43
    -
    Definition: matrix_traits.h:36
    -
    Kind
    Definition: matrix_traits.h:36
    -
    HgemmTransformerB< GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB >::Transformer GlobalTransformerB
    Definition: hgemm_traits.h:301
    - -
    Definition: hgemm_traits.h:271
    -
    HgemmCrosswiseGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, half const, Shape< 1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
    The traits class to build the iterator to load data from global memory for A^T.
    Definition: hgemm_traits.h:148
    +
    Definition: gemm_epilogue_traits.h:323
    +
    ReshapeThreads< VectorizedTile, Threads_ >::Threads Threads
    The threads shape.
    Definition: gemm_global_tile.h:88
    +
    Template performing matrix multiply-add operation within a thread.
    Definition: thread_multiply_add.h:44
    +
    Definition: matrix_traits.h:159
    +
    Kind
    Enumeration defining fundamental contiguous layouts.
    Definition: matrix_traits.h:159
    +
    GemmGlobalIteratorAb< typename GemmTileTraitsHelperA::GlobalTileTraits, Index_ > GlobalLoadIteratorA
    The iterator to load A from global memory.
    Definition: hgemm_traits.h:291
    + +
    Definition: hgemm_traits.h:281
    +
    HgemmCrosswiseGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, half const, Shape< 1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
    The traits class to build the iterator to load data from global memory for A^T.
    Definition: hgemm_traits.h:154
    Tile traits used to construct global tile iterator for HGEMM. This is intended to partition the threa...
    -
    Functor to compute linear combination of fragments.
    Definition: linear_scaling.h:40
    +
    Functor to compute linear combination of fragments.
    Definition: linear_scaling.h:51
    Definition: convert.h:38
    -
    Definition: matrix_traits.h:43
    +
    Definition: matrix_traits.h:357
    Implements a software-pipelined efficient GEMM.
    -
    ReshapeThreads< Tile, Threads_ >::Threads Threads
    The threads shape.
    Definition: gemm_global_tile.h:87
    +
    GlobalLoadStream< GemmOperand::kB, GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB > GlobalLoadStreamB
    The stream to load B from global memory to shared memory.
    Definition: hgemm_traits.h:325
    +
    SharedLoadStream< SharedLoadIteratorA > SharedLoadStreamA
    The stream to load A from shared memory.
    Definition: hgemm_traits.h:334
    Defines structural properties of the GEMM epilogue.
    +
    TileStoreIterator< typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorB
    The iterator to store B to shared memory.
    Definition: hgemm_traits.h:319
    +
    TileStoreIterator< typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorA
    The iterator to store A to shared memory.
    Definition: hgemm_traits.h:300
    Definition: hgemm_swizzle.h:40
    Defines conversion operations among Fragments of different base type.
    -
    Convert< typename Iterator_::Fragment, typename Iterator_::Fragment > Transformer
    Definition: hgemm_traits.h:114
    +
    Convert< typename Iterator_::Fragment, typename Iterator_::Fragment > Transformer
    Definition: hgemm_traits.h:120
    Definition: hgemm_traits.h:57
    -
    An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
    Definition: tile_iterator.h:620
    -
    TileStoreIterator< typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorB
    The iterator to store B to shared memory.
    Definition: hgemm_traits.h:307
    +
    An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
    Definition: tile_iterator.h:836
    diff --git a/docs/hierarchy.html b/docs/hierarchy.html index 25ba6bdab..865698e5a 100644 --- a/docs/hierarchy.html +++ b/docs/hierarchy.html @@ -73,7 +73,7 @@ $(function() {
    This inheritance list is sorted roughly, but not completely, alphabetically:
    -
    [detail level 123]

    @@ -135,7 +135,7 @@ Namespaces

    +
    [detail level 1234]
    @@ -94,316 +94,389 @@ $(function() { - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
     Ccutlass::platform::aligned_chunk< Align >
     Ccutlass::platform::aligned_storage< Len, Align >Std::aligned_storage
     Ccutlass::AlignedStruct< kAlignment_ >
     Ccutlass::platform::alignment_of< ulong4 >
     Ccutlass::platform::alignment_of< ulonglong2 >
     Ccutlass::platform::alignment_of< ulonglong4 >
     Ccutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >
     Ccutlass::ComputeOffsetFromShape< Shape_ >Compute the offset for the given coordinates in a cube
     Ccutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >Compute the offset for the given coordinates in a cube with one channel and a depth of 1
     Ccutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >Compute the offset for the given coordinates in a cube with a depth of 1
     Ccutlass::ComputeOffsetFromStrides< Strides_ >Compute the offset for the given coordinates in a cube
     Ccutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >Compute the offset for the given coordinates in a cube with one channel and a depth of 1
     Ccutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >Compute the offset for the given coordinates in a cube with a depth of 1
     Ccutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ >Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_
     Ccutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >Specialization for D=1 and C=1
     Ccutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >Specialization for D=1
     Ccutlass::platform::conditional< B, T, F >Std::conditional (true specialization)
     Ccutlass::platform::conditional< false, T, F >Std::conditional (false specialization)
     Ccutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIteratorA const iterator implementing Predicate Iterator Concept enabling sequential read-only access to prediactes
     Ccutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ >Adapter to enable random access to predicates via logical coordinate within a tile
     Ccutlass::Convert< InputFragment_, OutputFragment_ >
     Ccutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >
     Ccutlass::Coord< N_ >Statically-sized array specifying Coords within a tensor
     Ccutlass::Coord< 4 >
     Ccutlass::Coord< Rank >
     Ccutlass::Copy< Fragment_ >
     Ccutlass::platform::default_delete< T >Default deleter
     Ccutlass::platform::default_delete< T[]>Partial specialization for deleting array types
     Ccutlass::divide_assert< Dividend, Divisor >
     Ccutlass::platform::is_base_of_helper< BaseT, DerivedT >::dummy< B, D >
     Ccutlass::platform::enable_if< C, T >Std::enable_if (true specialization)
     Ccutlass::platform::enable_if< false, T >Std::enable_if (false specialization)
     Ccutlass::Extent< T >Returns the extent of a scalar or vector
     Ccutlass::Extent< Vector< T, Lanes > >Returns the number of lanes of a vector if need be
     Ccutlass::Extent< Vector< T, Lanes > const >Returns the number of lanes of a vector if need be
     Ccutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
     Ccutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >A template defining Fragment Iterator Concept
     Ccutlass::FragmentLoad< kIteratorFragment, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     Ccutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     Ccutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     Ccutlass::gemm::FragmentMultiplyAdd< Scalar_ >
     Ccutlass::gemm::FragmentMultiplyAdd< half >
     Ccutlass::FragmentStore< kIteratorFragment, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     Ccutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     Ccutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     Ccutlass::gemm::Gemm< GemmTraits_ >
     Ccutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
     Ccutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
     Ccutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
     Ccutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
     Ccutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
     Ccutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
     Ccutlass::gemm::GemmDesc< Scalar_, Index_ >
     Ccutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
     Ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
     Ccutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
     Ccutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
     Ccutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
     Ccutlass::gemm::GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
     Ccutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
     Ccutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
     Ccutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >
     Ccutlass::GemmOperandGemm operand - D = A * B + C
     Ccutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ >Helper to describe attributes of GEMM matrix operands
     Ccutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
     Ccutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
     Ccutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
     Ccutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >
     Ccutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
     Ccutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
     Ccutlass::gemm::GemmTileTraitsHelperA< Kind, GemmConfig_ >
     Ccutlass::gemm::GemmTileTraitsHelperA< kLayout_, GemmConfig_ >
     Ccutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
     Ccutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >
     Ccutlass::gemm::GemmTileTraitsHelperB< Kind, GemmConfig_ >
     Ccutlass::gemm::GemmTileTraitsHelperB< kLayout_, GemmConfig_ >
     Ccutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
     Ccutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
     Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
     Ccutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
     Ccutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
     Ccutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
     Ccutlass::gemm::GetExtent< kOperand_, Tile_ >
     Ccutlass::gemm::GetExtent< GemmOperand::kA, Tile_ >
     Ccutlass::gemm::GetExtent< GemmOperand::kB, Tile_ >
     Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStreamAssemble the global load streams for A/B
     Ccutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
     Ccutlass::platform::greater< T >Std::greater
     Ccutlass::gemm::HgemmSwizzle< GlobalIterator_ >
     Ccutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
     Ccutlass::gemm::HgemmTransformerA< kLayout_, Iterator_ >
     Ccutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >
     Ccutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >
     Ccutlass::gemm::HgemmTransformerB< kLayout_, Iterator_ >
     Ccutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >
     Ccutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
     Ccutlass::IdentityDescribes identity elements
     Ccutlass::gemm::IdentityBlockSwizzle
     Ccutlass::gemm::IgemmEpilogueScalar< ScalarD_ >
     Ccutlass::gemm::IgemmEpilogueScalar< int >
     Ccutlass::gemm::IgemmFloatToInt8Converter< kElements_ >
     Ccutlass::gemm::IgemmGlobalLoadTransformer< InputFragment_, OutputScalar_ >
     Ccutlass::gemm::IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float >
     Ccutlass::gemm::IgemmGlobalStoreTransformer< InputScalar_, OutputFragment_ >
     Ccutlass::gemm::IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >
     Ccutlass::gemm::IgemmInt8ToFloatConverter< kElements_ >
     Ccutlass::gemm::IgemmSharedStoreTransformer< InputScalar_, OutputFragment_ >
     Ccutlass::gemm::IgemmSwizzle< GlobalIterator_ >
     Ccutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
     Ccutlass::gemm::IgemmTransformerA< kLayout_, Iterator_ >
     Ccutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >
     Ccutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >
     Ccutlass::gemm::IgemmTransformerB< kLayout_, Iterator_ >
     Ccutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >
     Ccutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
     Ccutlass::platform::integral_constant< value_t, V >Std::integral_constant
     Ccutlass::platform::integral_constant< bool, V >
     Ccutlass::platform::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>
     Ccutlass::platform::integral_constant< bool,(is_base_of_helper< remove_cv< BaseT >::type, remove_cv< DerivedT >::type >::value)||(is_same< remove_cv< BaseT >::type, remove_cv< DerivedT >::type >::value)>
     Ccutlass::platform::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>
     Ccutlass::platform::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>
     Ccutlass::platform::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>
     Ccutlass::platform::integral_constant< bool,(N &(N - 1))==0 >
     Ccutlass::platform::is_base_of_helper< BaseT, DerivedT >Helper for std::is_base_of
     Ccutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::IteratorAn iterator implementing Predicate Iterator Concept enabling sequential read and write access to predicates
     Ccutlass::IteratorAdvanceSpecifies dimension in which post-increment accesses advance
     Ccutlass::IteratorFragmentSpecifies whether iterator storage fragment consists of Scalar values or WMMA matrix
     Ccutlass::platform::less< T >Std::less
     Ccutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >Functor to compute linear combination of fragments
     Ccutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t >
     Ccutlass::Load< double, 2, Memory_, true, 16 >
     Ccutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >
     Ccutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >
     Ccutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >
     Ccutlass::log2_down< N, CurrentVal, Count >
     Ccutlass::log2_down< N, 1, Count >
     Ccutlass::log2_up< N, CurrentVal, Count >
     Ccutlass::log2_up< N, 1, Count >
     Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage
     Ccutlass::MatrixLayoutDescribes layouts of matrices
     Ccutlass::MemorySpaceEnum to specify which memory space data resides in
     Ccutlass::platform::nullptr_tStd::nullptr_t
     Ccutlass::platform::alignment_of< value_t >::pad
     Ccutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::ParamsThe params
     CParams
     Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::ParamsThe params
     Ccutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::ParamsThe params
     Ccutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::ParamsParameters to the iterator
     Ccutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::ParamsThe params
     Ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::ParamsThe params
     Ccutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::ParamsThe params
     Ccutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::ParamsThe parameters
     Ccutlass::platform::plus< T >Platform::plus
     Ccutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >Adapter to enable random access to predicates via logical coordinate within a tile
     Ccutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >Statically sized array of bits implementing
     Ccutlass::PredicateVector< Base::Iterations::kW >
     Ccutlass::PredicateVector< ShapeCount< typename Base::Iterations >::kCount >
     Ccutlass::gemm::ProjectOperand< operand, Kstrided >
     Ccutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided >Project A operand - (0, K, M)
     Ccutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided >Project B operand - (0, K, N)
     Ccutlass::gemm::ProjectOperand< GemmOperand::kC, true >Project C operand - (0, N, M)
     Ccutlass::gemm::ProjectOperand< GemmOperand::kD, true >Project D operand - (0, N, M)
     Ccutlass::platform::remove_const< T >Std::remove_const (non-const specialization)
     Ccutlass::platform::remove_const< const T >Std::remove_const (const specialization)
     Ccutlass::platform::remove_cv< T >Std::remove_cv
     Ccutlass::platform::remove_volatile< T >Std::remove_volatile (non-volatile specialization)
     Ccutlass::platform::remove_volatile< volatile T >Std::remove_volatile (volatile specialization)
     Ccutlass::gemm::ReshapeThreads< Tile_, Threads_, bool >
     Ccutlass::gemm::ReshapeThreads< Tile_, Threads_, true >
     Ccutlass::ReshapeTile< Tile_, kAccessSize_, bool >
     Ccutlass::ReshapeTile< Tile_, kAccessSize_, true >
     Ccutlass::Shape< kD_, kH_, kW_, kC_ >A Shape implementing Layout Concept describing the dimensions of a cube
     Ccutlass::ShapeAdd< A_, B_ >
     Ccutlass::ShapeCount< Shape >Compute derived counted of a Layout Concept based class
     Ccutlass::ShapeDiv< A_, B_ >
     Ccutlass::ShapeMax< A_, B_ >
     Ccutlass::ShapeMin< A_, B_ >
     Ccutlass::ShapeMul< A_, B_ >
     Ccutlass::ShapeScale< A_, kScale_ >
     Ccutlass::ShapeStrides< Shape_ >
     Ccutlass::ShapeSub< A_, B_ >
     Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreamAssemble the shared load stream for A/B
     Ccutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
     Ccutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::SharedStorageThe shared storage
     Ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorageThe shared memory to swizzle the data in the epilogue
     Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorageThe storage in shared memory
     Ccutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorageThe storage in shared memory needed by that stream
     Ccutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
     Ccutlass::sqrt_est< N >
     Ccutlass::StorageType< kAlignment_ >
     Ccutlass::StorageType< 1 >
     Ccutlass::StorageType< 2 >
     Ccutlass::StorageType< 4 >
     Ccutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t >
     Ccutlass::Store< double, 2, Memory_, true, 16 >
     Ccutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >
     Ccutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >
     Ccutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >
     Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ >
     Ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorageThe shared memory storage to exchange data
     Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStreamA, SharedLoadStreamA >
     Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStreamB, SharedLoadStreamB >
     Ccutlass::TensorRef< Storage_, Rank_ >Structure modeling a pointer and stride into a tensor
     Ccutlass::TensorRef< T, 4 >
     Ccutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >Template performing matrix multiply-add operation within a thread
     Ccutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >Template performing matrix multiply-add operation within a thread
     Ccutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >Template performing matrix multiply-add operation within a thread
     Ccutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset
     Ccutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset
     Ccutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::TiledThreadOffset< ThreadShape >Basic thread offset function computed from a thread shape
     Ccutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >Iterator for accessing a stripmined tile in memory
     Ccutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >
     Ccutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
     Ccutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ >A template defining Tile Traits Concept
     Ccutlass::TileTraitsContiguousMajor< Tile_, Threads >
     Ccutlass::TileTraitsStandard< Tile_, Threads >Chooses 'best' shape to enable warp raking along contiguous dimension if possible
     Ccutlass::TileTraitsStrideMajor< Tile_, Threads >
     Ccutlass::TileTraitsWarpRake< Tile_, Threads >Tiling in which warps rake across the contiguous dimension
     Ccutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIteratorIterator that always returns true
     Ccutlass::TrivialPredicateTileAdapterAlways returns true predicate
     Ccutlass::platform::unique_ptr< T, Deleter >Std::unique_ptr
     Ccutlass::Vector< Scalar_, kLanes_ >
     Ccutlass::Vector< half, kLanes_ >
     Ccutlass::Vectorize< Element_, kLanes_ >
     Ccutlass::Vectorize< Element_, 1 >
     Ccutlass::VectorTraits< T >Traits describing properties of vectors and scalar-as-vectors
     Ccutlass::VectorTraits< Vector< T, Lanes > >Partial specialization for actual cutlass::Vector
     Ccutlass::VectorTraits< Vector< T, Lanes > const >Partial specialization for actual cutlass::Vector
     Ccutlass::bin1_t
     Ccutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >
     Ccutlass::MatrixLayout::ColumnMajorMapping function for column-major matrices
     Ccutlass::MatrixLayout::ColumnMajorBlockLinear< BlockRows, BlockColumns >
     Ccutlass::gemm::ColumnMajorBlockSwizzle< groupCols, swDirection >
     Ccutlass::MatrixLayout::ColumnMajorInterleaved< Interleave >
     Ccutlass::platform::complex< T >
     Ccutlass::ComputeOffsetFromShape< Shape_ >Compute the offset for the given coordinates in a cube
     Ccutlass::ComputeOffsetFromStrides< Strides_ >Compute the offset for the given coordinates in a cube
     Ccutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ >Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_
     Ccutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >Specialization for D=1 and C=1
     Ccutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >Specialization for D=1
     Ccutlass::platform::conditional< B, T, F >Std::conditional (true specialization)
     Ccutlass::platform::conditional< false, T, F >Std::conditional (false specialization)
     Ccutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIteratorA const iterator implementing Predicate Iterator Concept enabling sequential read-only access to prediactes
     Ccutlass::TensorRefBatchStrided< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorConstant iterator over tensors implied by TensorRefBatchStrided
     Ccutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >::ConstIteratorTensorRefIterator over TensorRef objects in TensorRefArray
     Ccutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ >Adapter to enable random access to predicates via logical coordinate within a tile
     Ccutlass::MatrixLayout::ContiguousLayout
     Ccutlass::Convert< InputFragment_, OutputFragment_ >
     Ccutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >
     Ccutlass::Coord< Rank_, Index_ >Statically-sized array specifying Coords within a tensor
     Ccutlass::Coord< 2, int >
     Ccutlass::Coord< 3 >
     Ccutlass::Coord< 4 >
     Ccutlass::Coord< 4, Index_ >
     Ccutlass::Coord< 4, int >
     Ccutlass::Coord< kStorageRank - 1 >
     Ccutlass::Copy< Fragment_ >
     CDebugType< T >
     CDebugValue< Value >
     Ccutlass::platform::default_delete< T >Default deleter
     Ccutlass::platform::default_delete< T[]>Partial specialization for deleting array types
     Ccutlass::divide_assert< Dividend, Divisor >
     Ccutlass::platform::is_base_of_helper< BaseT, DerivedT >::dummy< B, D >
     Ccutlass::DumpType< T >
     Ccutlass::platform::enable_if< C, T >Std::enable_if (true specialization)
     Ccutlass::platform::enable_if< false, T >Std::enable_if (false specialization)
     Ccutlass::Extent< T >Returns the extent of a scalar or vector
     Ccutlass::Extent< Vector< T, Lanes > >Returns the number of lanes of a vector if need be
     Ccutlass::Extent< Vector< T, Lanes > const >Returns the number of lanes of a vector if need be
     Ccutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
     Ccutlass::FragmentElementTypeSpecifies whether iterator storage fragment consists of Scalar values or WMMA matrix
     Ccutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >A template defining Fragment Iterator Concept
     Ccutlass::gemm::FragmentMultiplyAdd< ScalarAlphaBeta_, ScalarAccum_, fragMul2 >
     Ccutlass::gemm::FragmentMultiplyAdd< half, half, true >
     Ccutlass::gemm::Gemm< GemmTraits_ >
     Ccutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_, kResidueSeparate_, kResidueInProlog_, kLaunchBounds_ >
     Ccutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< ThreadGemmShape_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2, false, false, false >
     Ccutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< ThreadGemmShape_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2, false, true, kLaunchBounds >
     Ccutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< ThreadGemmShape_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2, false, true, false >
     Ccutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< ThreadGemmShape_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2, false, true, false >
     Ccutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< ThreadGemmShape_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2, false, false, false >
     Ccutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, ThreadMultiplyAdd< ThreadGemmShape_, Shape< 1, 4, 8 >, ScalarA_, ScalarB_, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
     Ccutlass::gemm::GemmDesc< AType_, BType_, CType_, DType_, SType_, Index_ >GEMM problem description
     Ccutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
     Ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadStreamD_, Iterations_, Delta_, Functor_, Index_ >
     Ccutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadStreamD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
     Ccutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadStreamD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
     Ccutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
     Ccutlass::gemm::GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
     Ccutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
     Ccutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
     Ccutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >
     Ccutlass::GemmOperandGemm operand - D = A * B + C
     Ccutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ >Helper to describe attributes of GEMM matrix operands
     Ccutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
     Ccutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
     Ccutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
     Ccutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >
     Ccutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
     Ccutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
     Ccutlass::gemm::GemmTileTraitsHelperA< Kind, GemmConfig_ >
     Ccutlass::gemm::GemmTileTraitsHelperA< kLayout_, GemmConfig_ >
     Ccutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
     Ccutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >
     Ccutlass::gemm::GemmTileTraitsHelperB< Kind, GemmConfig_ >
     Ccutlass::gemm::GemmTileTraitsHelperB< kLayout_, GemmConfig_ >
     Ccutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
     Ccutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
     Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
     Ccutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
     Ccutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
     Ccutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
     Ccutlass::gemm::GetExtent< kOperand_, Tile_ >
     Ccutlass::gemm::GetExtent< GemmOperand::kA, Tile_ >
     Ccutlass::gemm::GetExtent< GemmOperand::kB, Tile_ >
     Ccutlass::gemm::GlobalLoadStream< Operand, LoadIterator_, StoreIterator_, Transformer_ >
     Ccutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >Collect the global load streams for multiplicands
     Ccutlass::platform::greater< T >Std::greater
     Ccutlass::gemm::HgemmSwizzle< GlobalIterator_ >
     Ccutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, ThreadGemmShape_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
     Ccutlass::gemm::HgemmTransformerA< kLayout_, Iterator_ >
     Ccutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >
     Ccutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >
     Ccutlass::gemm::HgemmTransformerB< kLayout_, Iterator_ >
     Ccutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >
     Ccutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
     Ccutlass::IdentityDescribes identity elements
     Ccutlass::gemm::IdentityBlockSwizzle
     Ccutlass::IdentityTensorMapFunc< Rank >
     Ccutlass::IdentityTensorMapFunc< Rank_ >
     Ccutlass::gemm::IgemmEpilogueScalar< ScalarD_ >
     Ccutlass::gemm::IgemmEpilogueScalar< int >
     Ccutlass::gemm::IgemmFloatToInt8Converter< kElements_ >
     Ccutlass::gemm::IgemmGlobalLoadTransformer< InputFragment_, OutputScalar_ >
     Ccutlass::gemm::IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float >
     Ccutlass::gemm::IgemmGlobalStoreTransformer< InputScalar_, OutputFragment_ >
     Ccutlass::gemm::IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >
     Ccutlass::gemm::IgemmInt8ToFloatConverter< kElements_ >
     Ccutlass::gemm::IgemmSharedStoreTransformer< InputScalar_, OutputFragment_ >
     Ccutlass::gemm::IgemmSwizzle< GlobalIterator_ >
     Ccutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_, Index_ >
     Ccutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_, Index_ >
     Ccutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, ThreadGemmShape_, Index_ >
     Ccutlass::gemm::IgemmTransformerA< kLayout_, Iterator_ >
     Ccutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >
     Ccutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >
     Ccutlass::gemm::IgemmTransformerB< kLayout_, Iterator_ >
     Ccutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >
     Ccutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
     Ccutlass::int4_t
     Ccutlass::platform::integral_constant< value_t, V >Std::integral_constant
     Ccutlass::platform::integral_constant< bool, V >
     Ccutlass::platform::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>
     Ccutlass::platform::integral_constant< bool,(is_base_of_helper< remove_cv< BaseT >::type, remove_cv< DerivedT >::type >::value)||(is_same< remove_cv< BaseT >::type, remove_cv< DerivedT >::type >::value)>
     Ccutlass::platform::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>
     Ccutlass::platform::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>
     Ccutlass::platform::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>
     Ccutlass::platform::integral_constant< bool,(N &(N - 1))==0 >
     Ccutlass::platform::is_base_of_helper< BaseT, DerivedT >Helper for std::is_base_of
     Ccutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::IteratorAn iterator implementing Predicate Iterator Concept enabling sequential read and write access to predicates
     Ccutlass::IteratorAdvanceSpecifies dimension in which post-increment accesses advance
     Ccutlass::KernelLaunchConfigurationStructure containing the basic launch configuration of a CUDA kernel
     Ccutlass::gemm::Launch< Gemm, WithLaunchBounds >Partial specialization for launching the GEMM kernel with or without launch bounds
     Ccutlass::gemm::Launch< Gemm, false >Partial specialization for launching the GEMM kernel with or without launch bounds
     Ccutlass::platform::less< T >Std::less
     Ccutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >Functor to compute linear combination of fragments
     Ccutlass::Load< Scalar_, kAccessSize, Memory_, kFragmentElementType, FragmentElement_, kStride, size >
     Ccutlass::Load< double, 2, Memory_, FragmentElementType::kScalar, double, kStride, 16 >
     Ccutlass::Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, 1, 2 >Partial specialization for 16b loads
     Ccutlass::Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 16 >
     Ccutlass::Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 4 >
     Ccutlass::Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 8 >
     Ccutlass::Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size >
     Ccutlass::Load< Vector< bin1_t, 32 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size >
     Ccutlass::Load< Vector< int4_t, 8 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size >
     Ccutlass::Load< Vector< uint4_t, 8 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size >
     Ccutlass::log2_down< N, CurrentVal, Count >
     Ccutlass::log2_down< N, 1, Count >
     Ccutlass::log2_up< N, CurrentVal, Count >
     Ccutlass::log2_up< N, 1, Count >
     Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage
     Ccutlass::MatrixLayoutDefines data layouts of various matrix formats usable by TensorRef and other classes
     Ccutlass::MatrixTransformTransformation applied to matrix operands
     Ccutlass::Max< A, B >
     Ccutlass::MemorySpaceEnum to specify which memory space data resides in
     Ccutlass::Min< A, B >
     Ccutlass::platform::nullptr_tStd::nullptr_t
     Ccutlass::platform::alignment_of< value_t >::pad
     Ccutlass::gemm::LinearScalingDevicePtr< Scalar_, FragmentMultiplyAdd_ >::ParamsThe parameters
     Ccutlass::gemm::GlobalLoadStream< Operand, LoadIterator_, StoreIterator_, Transformer_ >::ParamsThe params
     Ccutlass::gemm::SharedStreamPair< StreamA_, StreamB_ >::ParamsParameters object passed to load iterators
     Ccutlass::ZipTileIterator< First_, Second_ >::ParamsParams object
     Ccutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::ParamsThe parameters
     Ccutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::ParamsParameters object
     Ccutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::ParamsThe params
     Ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadStreamD_, Iterations_, Delta_, Functor_, Index_ >::ParamsThe params
     Ccutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, FragmentElementType_, Skew_ >::ParamsParameters to the iterator
     Ccutlass::TileLoadStream< Iterator_, Transformer_ >::ParamsParameters object used to construct generic load stream
     Ccutlass::TileStoreStream< Iterator_, Transformer_ >::ParamsParameters used to construct the stream
     Ccutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::ParamsThe params
     Ccutlass::platform::plus< T >Platform::plus
     Ccutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >Adapter to enable random access to predicates via logical coordinate within a tile
     Ccutlass::TileLoadStream< Iterator_, Transformer_ >::PredicateVectorEmpty predicate vector struct
     Ccutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >Statically sized array of bits implementing
     Ccutlass::TileStoreStream< Iterator_, Transformer_ >::PredicateVectorEmpty predicate vector struct
     Ccutlass::PredicateVector< Base::Iterations::kW >
     Ccutlass::PredicateVector< ShapeCount< typename Base::Iterations >::kCount >
     Ccutlass::gemm::ProjectOperand< operand, Kstrided >
     Ccutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided >Project A operand - (0, K, M)
     Ccutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided >Project B operand - (0, K, N)
     Ccutlass::gemm::ProjectOperand< GemmOperand::kC, true >Project C operand - (0, N, M)
     Ccutlass::gemm::ProjectOperand< GemmOperand::kD, true >Project D operand - (0, N, M)
     Ccutlass::RegularTilePredicateFunctor< Delta_ >Functor computing a predicate given the logical position of an access
     Ccutlass::platform::remove_const< T >Std::remove_const (non-const specialization)
     Ccutlass::platform::remove_const< const T >Std::remove_const (const specialization)
     Ccutlass::platform::remove_cv< T >Std::remove_cv
     Ccutlass::platform::remove_volatile< T >Std::remove_volatile (non-volatile specialization)
     Ccutlass::platform::remove_volatile< volatile T >Std::remove_volatile (volatile specialization)
     Ccutlass::gemm::ReshapeThreads< Tile_, Threads_, bool >
     Ccutlass::gemm::ReshapeThreads< Tile_, Threads_, true >
     Ccutlass::ReshapeTile< Tile_, kAccessSize_, bool >
     Ccutlass::ReshapeTile< Tile_, kAccessSize_, true >
     Ccutlass::MatrixLayout::RowMajorMapping function for row-major matrices
     Ccutlass::MatrixLayout::RowMajorBlockLinear< BlockRows, BlockColumns >
     Ccutlass::gemm::RowMajorBlockSwizzle< groupRows, swDirection >
     Ccutlass::MatrixLayout::RowMajorInterleaved< Interleave >
     Ccutlass::ScalarIO< T >Helper to enable formatted printing of CUTLASS scalar types to an ostream
     Ccutlass::detail::ScalarOrPointer< Scalar_ >
     Ccutlass::detail::ScalarOrPointer< Scalar >
     Ccutlass::Shape< kD_, kH_, kW_, kC_ >A Shape implementing Layout Concept describing the dimensions of a cube
     Ccutlass::ShapeAdd< A_, B_ >
     Ccutlass::ShapeCount< Shape >Compute derived counted of a Layout Concept based class
     Ccutlass::ShapeDiv< A_, B_ >
     Ccutlass::ShapeDivCeiling< A_, B_ >
     Ccutlass::ShapeMax< A_, B_ >
     Ccutlass::ShapeMin< A_, B_ >
     Ccutlass::ShapeMul< A_, B_ >
     Ccutlass::ShapeScale< A_, kScale_ >
     Ccutlass::ShapeStrides< Shape_, elementsPerAccess >
     Ccutlass::ShapeSub< A_, B_ >
     Ccutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
     Ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadStreamD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorageThe shared memory to swizzle the data in the epilogue
     Ccutlass::gemm::GlobalLoadStreamPair< StreamA_, StreamB_, kResidueInProlog_ >::SharedStorageDefines a structure containing shared storage for each pair
     Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorageThe storage in shared memory
     Ccutlass::gemm::GlobalLoadStream< Operand, LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage
     Ccutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::SharedStorageThe shared storage
     Ccutlass::gemm::SharedStreamPair< StreamA_, StreamB_ >Collect the global load streams for multiplicands
     Ccutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
     Ccutlass::sqrt_est< N >
     Ccutlass::StorageType< alignment >
     Ccutlass::StorageType< 1 >
     Ccutlass::StorageType< 2 >
     Ccutlass::StorageType< 4 >
     Ccutlass::StorageType< kAlignment_ >
     Ccutlass::StorageType< sizeof(Scalar)>
     Ccutlass::Store< Scalar_, kAccessSize, Memory_, kFragmentElementType, FragmentElement_, kStride, size >
     Ccutlass::Store< double, 2, Memory_, FragmentElementType::kScalar, double, kStride, 16 >
     Ccutlass::Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, 1, 2 >
     Ccutlass::Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 16 >
     Ccutlass::Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 4 >
     Ccutlass::Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 8 >
     Ccutlass::Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size >
     Ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadStreamD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorageThe shared memory storage to exchange data
     Ccutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >::StrideVector
     Ccutlass::gemm::swizzleDirection
     Ccutlass::TensorRef< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
     Ccutlass::TensorRef< AType const, 2 >
     Ccutlass::TensorRef< BType const, 2 >
     Ccutlass::TensorRef< CType const, 2 >
     Ccutlass::TensorRef< DType, 2 >
     Ccutlass::TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >Specialization for rank=1 case with no internal StrideVector
     Ccutlass::TensorRefArray< Storage_, Rank_, MapFunc_, StorageRank_, Index_, LongIndex_ >
     Ccutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_, kLayout_ >Template performing matrix multiply-add operation within a thread
     Ccutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >Template performing matrix multiply-add operation within a thread
     Ccutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, half >Template performing matrix multiply-add operation within a thread
     Ccutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int >Template performing matrix multiply-add operation within a thread
     Ccutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset
     Ccutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset
     Ccutlass::gemm::IgemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
     Ccutlass::TileAllocation< Scalar_, Shape_ >Class for storing a tile in memory and accessing it through a tensor ref
     Ccutlass::TiledThreadOffset< ThreadShape >Basic thread offset function computed from a thread shape
     Ccutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, FragmentElementType_, Skew_ >Iterator for accessing a stripmined tile in memory
     Ccutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, FragmentElementType::kScalar, Shape< 0, 0, 0, 0 > >
     Ccutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
     Ccutlass::TileLoadStream< Iterator_, Transformer_ >Generic stream for loading and transforming fragments
     Ccutlass::TileStoreStream< Iterator_, Transformer_ >Generic stream for transforming and storing fragments
     Ccutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_, AccessSize >A template defining Tile Traits Concept
     Ccutlass::TileTraitsContiguousMajor< Tile_, Threads >
     Ccutlass::TileTraitsStandard< Tile_, Threads >Chooses 'best' shape to enable warp raking along contiguous dimension if possible
     Ccutlass::TileTraitsStrideMajor< Tile_, Threads >
     Ccutlass::TileTraitsWarpRake< Tile_, Threads >Tiling in which warps rake across the contiguous dimension
     Ccutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIteratorIterator that always returns true
     Ccutlass::TrivialPredicateTileAdapterAlways returns true predicate
     Ccutlass::uint4_t
     Ccutlass::platform::unique_ptr< T, Deleter >Std::unique_ptr
     Ccutlass::Vector< Scalar_, kLanes_ >
     Ccutlass::Vector< bin1_t, kLanes_ >Vector definition for 1-bit binary datatype
     Ccutlass::Vector< half, 1 >
     Ccutlass::Vector< half, kLanes_ >
     Ccutlass::Vector< int4_t, kLanes_ >Vector definition for 4-bit signed integer datatype
     Ccutlass::Vector< uint4_t, kLanes_ >Vector definition for 4-bit unsigned integer datatype
     Ccutlass::Vectorize< Element_, kLanes_ >
     Ccutlass::Vectorize< Vector< bin1_t, 32 >, kLanes_ >
     Ccutlass::Vectorize< Vector< int4_t, 8 >, kLanes_ >
     Ccutlass::Vectorize< Vector< uint4_t, 8 >, kLanes_ >
     Ccutlass::VectorTraits< T >Traits describing properties of vectors and scalar-as-vectors
     Ccutlass::VectorTraits< Vector< T, Lanes > >Partial specialization for actual cutlass::Vector
     Ccutlass::VectorTraits< Vector< T, Lanes > const >Partial specialization for actual cutlass::Vector
     Ccutlass::ZipConvert< First_, Second_ >Zips two convert operations
     Ccutlass::ZipFragment< First_, Second_ >A template defining Fragment Concept
     Ccutlass::ZipTensorRef< First_, Second_ >
     Ccutlass::ZipTileAllocation< First_, Second_ >Manages a pair of tile allocations as if they are one allocation
     Ccutlass::ZipTileIterator< First_, Second_ >Constructs an iterator from a pair of iterators
    diff --git a/docs/igemm__epilogue_8h.html b/docs/igemm__epilogue_8h.html index 9b5e5ccf0..f7332de21 100644 --- a/docs/igemm__epilogue_8h.html +++ b/docs/igemm__epilogue_8h.html @@ -82,13 +82,13 @@ $(function() {

    Defines the epilogue phase of the GEMM computation for IGEMM, supporting integer and floating-point output matrix formats. More...

    -
    #include <cutlass/convert.h>
    -#include <cutlass/fragment.h>
    -#include <cutlass/gemm/gemm_global_stream.h>
    -#include <cutlass/gemm/gemm_shared_stream.h>
    -#include <cutlass/gemm/igemm_global_tile.h>
    -#include <cutlass/reshape_tile.h>
    -#include <cutlass/tile_iterator.h>
    +

    Go to the source code of this file.

    @@ -127,7 +127,7 @@ Namespaces diff --git a/docs/igemm__epilogue_8h_source.html b/docs/igemm__epilogue_8h_source.html index bfef820ae..43f9f1583 100644 --- a/docs/igemm__epilogue_8h_source.html +++ b/docs/igemm__epilogue_8h_source.html @@ -76,67 +76,66 @@ $(function() {
    igemm_epilogue.h
    -Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include <cutlass/convert.h>
    32 #include <cutlass/fragment.h>
    36 #include <cutlass/reshape_tile.h>
    37 #include <cutlass/tile_iterator.h>
    38 
    39 namespace cutlass {
    40 namespace gemm {
    41 
    43 
    44 template <int kElements_>
    50 
    51  // We are packing 4 floats into int32 registers so we need kElements to be multiple of 4.
    52  static_assert(kElements_ % 4 == 0, "kElements must be multiple of 4");
    53 
    55  CUTLASS_DEVICE IgemmFloatToInt8Converter() {}
    56 
    58  CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) {
    59  transform(src, 0, dst);
    60  }
    61 
    63  template <typename Fragment_>
    64  CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) {
    65  // The inputs.
    66  float4 const* src_f4 = reinterpret_cast<float4 const*>(&src[0]);
    67  // The outputs.
    68  int* dst_int = reinterpret_cast<int*>(&dst[0]);
    69 
    70  // Iterate over the floats and pack them together to produce ints.
    71  for (int i = 0; i < kElements_ / 4; ++i) {
    72  // Read the float4.
    73  float4 f4 = src_f4[i];
    74 
    75  // Clamp the 4 elements of the floats to the [-128, +127] range.
    76  float x = fmaxf(-128.f, fminf(127.f, f4.x));
    77  float y = fmaxf(-128.f, fminf(127.f, f4.y));
    78  float z = fmaxf(-128.f, fminf(127.f, f4.z));
    79  float w = fmaxf(-128.f, fminf(127.f, f4.w));
    80 
    81  // Convert to integers.
    82  int ix = (int)x;
    83  int iy = (int)y;
    84  int iz = (int)z;
    85  int iw = (int)w;
    86 
    87  // Extract the lower bytes to build an int32 with 4 int8.
    88  asm volatile("prmt.b32 %0, %0, %1, 0x1140;" : "+r"(ix) : "r"(iy));
    89  asm volatile("prmt.b32 %0, %0, %1, 0x1140;" : "+r"(iz) : "r"(iw));
    90  asm volatile("prmt.b32 %0, %0, %1, 0x5410;" : "+r"(ix) : "r"(iz));
    91 
    92  // Store the int.
    93  dst_int[i] = ix;
    94  }
    95  }
    96 };
    97 
    99 
    100 template <typename InputScalar_, typename OutputFragment_>
    103 };
    104 
    105 template <int kElements_>
    106 struct IgemmGlobalStoreTransformer<float, Fragment<int8_t, kElements_> > {
    108 };
    109 
    111 
    112 template <int kElements_>
    118 
    119  // We are unpacking 4 int8s from int32.
    120  static_assert(kElements_ % 4 == 0, "kElements must be multiple of 4");
    121 
    123  CUTLASS_DEVICE IgemmInt8ToFloatConverter() {}
    124 
    126  CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) {
    127  transform(src, 0, dst);
    128  }
    129 
    131  template <typename Fragment_>
    132  CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) {
    133  // The inputs.
    134  int const* src_int = reinterpret_cast<int const*>(&src[0]);
    135  // The outputs.
    136  float4* dst_f4 = reinterpret_cast<float4*>(&dst[0]);
    137 
    138  // Iterate over the int8 and unpack them together to produce floats.
    139  for (int i = 0; i < kElements_ / 4; ++i) {
    140  // Read the int.
    141  int ix, iy, iz, iw = src_int[i];
    142 
    143  // Extract the 4 bytes.
    144  asm volatile("prmt.b32 %0, 0x0, %1, 0x4440;" : "=r"(ix) : "r"(iw));
    145  asm volatile("prmt.b32 %0, 0x0, %1, 0x4441;" : "=r"(iy) : "r"(iw));
    146  asm volatile("prmt.b32 %0, 0x0, %1, 0x4442;" : "=r"(iz) : "r"(iw));
    147  asm volatile("prmt.b32 %0, 0x0, %1, 0x4443;" : "=r"(iw) : "r"(iw));
    148 
    149  // The floats.
    150  float fx, fy, fz, fw;
    151 
    152  // Convert to floats (make sure we generate I2F.F32.S8).
    153  asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fx) : "r"(ix));
    154  asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fy) : "r"(iy));
    155  asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fz) : "r"(iz));
    156  asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fw) : "r"(iw));
    157 
    158  // Store the float4.
    159  dst_f4[i] = make_float4(fx, fy, fz, fw);
    160  }
    161  }
    162 };
    163 
    165 
    166 template <typename InputFragment_, typename OutputScalar_>
    169 };
    170 
    171 template <int kElements_>
    172 struct IgemmGlobalLoadTransformer<Fragment<int8_t, kElements_>, float> {
    174 };
    175 
    177 
    178 template <typename InputScalar_, typename OutputFragment_>
    181 };
    182 
    184 
    185 template <typename IgemmConfig_, typename EpilogueFunctor_, typename Index_>
    187  : public GemmEpilogueTraitsHelper<IgemmConfig_, EpilogueFunctor_, Index_> {
    191  typedef IgemmConfig_ IgemmConfig;
    192 
    194  typedef typename Base::Scalar Scalar;
    196  typedef typename Base::Iterations Iterations;
    198  typedef typename Base::Delta Delta;
    199 
    207  typedef
    209 
    217  typedef
    219 
    232  SharedStoreFragmentD>::Transformer
    242 };
    243 
    245 
    246 template <
    248  typename IgemmConfig_,
    250  typename EpilogueFunctor_,
    252  typename Index_ = int,
    256  // The output tile.
    257  typename IgemmConfig_::OutputTile,
    258  // The accumulators.
    259  typename IgemmConfig_::Accumulators,
    260  // The global iterator for C.
    261  typename Helper_::GlobalLoadIteratorC,
    262  // The transformer for C.
    263  typename Helper_::GlobalTransformerC,
    264  // The transformer for D.
    265  typename Helper_::GlobalTransformerD,
    266  // The global iterator for D.
    267  typename Helper_::GlobalStoreIteratorD,
    268  // The iterator to store D to shared memory.
    269  typename Helper_::SharedStoreIteratorD,
    270  // The shared store transformer for D.
    271  typename Helper_::SharedStoreTransformerD,
    272  // The iterator to load D from shared memory.
    273  typename Helper_::SharedLoadIteratorD,
    274  // The iterations.
    275  typename Helper_::Iterations,
    276  // The strides between iterations.
    277  typename Helper_::Delta,
    278  // The functor to be used in the epilogue.
    279  EpilogueFunctor_,
    280  // The index.
    281  Index_> {
    283  static bool const kInt8Output =
    285 };
    286 
    288 
    289 template <typename GemmEpilogueTraits_, bool = GemmEpilogueTraits_::kInt8Output>
    290 struct IgemmEpilogue : public GemmEpilogue<GemmEpilogueTraits_> {
    293 
    295  CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const& params_,
    296  typename Base::SharedStorage& shared_storage_,
    297  typename Base::Index m_,
    298  typename Base::Index n_)
    299  : Base(params_, shared_storage_, m_, n_) {}
    300 };
    301 
    303 
    304 template <typename GemmEpilogueTraits_>
    305 struct IgemmEpilogue<GemmEpilogueTraits_, true> : public GemmEpilogue<GemmEpilogueTraits_> {
    308 
    310  CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const& params_,
    311  typename Base::SharedStorage& shared_storage_,
    312  typename Base::Index m_,
    313  typename Base::Index n_)
    314  : Base(params_, shared_storage_, m_, n_) {}
    315 };
    316 
    318 
    319 } // namespace gemm
    320 } // namespace cutlass
    Definition: gemm_global_tile.h:116
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include "cutlass/convert.h"
    32 #include "cutlass/fragment.h"
    36 #include "cutlass/reshape_tile.h"
    37 #include "cutlass/tile_iterator.h"
    38 
    39 namespace cutlass {
    40 namespace gemm {
    41 
    43 
    44 template <int kElements_>
    50 
    51  // We are packing 4 floats into int32 registers so we need kElements to be multiple of 4.
    52  static_assert(kElements_ % 4 == 0, "kElements must be multiple of 4");
    53 
    55  CUTLASS_DEVICE IgemmFloatToInt8Converter() {}
    56 
    58  CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) {
    59  transform(src, 0, dst);
    60  }
    61 
    63  template <typename Fragment_>
    64  CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) {
    65  // The inputs.
    66  float4 const* src_f4 = reinterpret_cast<float4 const*>(&src[0]);
    67  // The outputs.
    68  int* dst_int = reinterpret_cast<int*>(&dst[0]);
    69 
    70  // Iterate over the floats and pack them together to produce ints.
    71  for (int i = 0; i < kElements_ / 4; ++i) {
    72  // Read the float4.
    73  float4 f4 = src_f4[i];
    74 
    75  // Clamp the 4 elements of the floats to the [-128, +127] range.
    76  float x = fmaxf(-128.f, fminf(127.f, f4.x));
    77  float y = fmaxf(-128.f, fminf(127.f, f4.y));
    78  float z = fmaxf(-128.f, fminf(127.f, f4.z));
    79  float w = fmaxf(-128.f, fminf(127.f, f4.w));
    80 
    81  // Convert to integers.
    82  int ix = (int)x;
    83  int iy = (int)y;
    84  int iz = (int)z;
    85  int iw = (int)w;
    86 
    87  // Extract the lower bytes to build an int32 with 4 int8.
    88  asm volatile("prmt.b32 %0, %0, %1, 0x1140;" : "+r"(ix) : "r"(iy));
    89  asm volatile("prmt.b32 %0, %0, %1, 0x1140;" : "+r"(iz) : "r"(iw));
    90  asm volatile("prmt.b32 %0, %0, %1, 0x5410;" : "+r"(ix) : "r"(iz));
    91 
    92  // Store the int.
    93  dst_int[i] = ix;
    94  }
    95  }
    96 };
    97 
    99 
    100 template <typename InputScalar_, typename OutputFragment_>
    103 };
    104 
    105 template <int kElements_>
    106 struct IgemmGlobalStoreTransformer<float, Fragment<int8_t, kElements_> > {
    108 };
    109 
    111 
    112 template <int kElements_>
    118 
    119  // We are unpacking 4 int8s from int32.
    120  static_assert(kElements_ % 4 == 0, "kElements must be multiple of 4");
    121 
    123  CUTLASS_DEVICE IgemmInt8ToFloatConverter() {}
    124 
    126  CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) {
    127  transform(src, 0, dst);
    128  }
    129 
    131  template <typename Fragment_>
    132  CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) {
    133  // The inputs.
    134  int const* src_int = reinterpret_cast<int const*>(&src[0]);
    135  // The outputs.
    136  float4* dst_f4 = reinterpret_cast<float4*>(&dst[0]);
    137 
    138  // Iterate over the int8 and unpack them together to produce floats.
    139  for (int i = 0; i < kElements_ / 4; ++i) {
    140  // Read the int.
    141  int ix, iy, iz, iw = src_int[i];
    142 
    143  // Extract the 4 bytes.
    144  asm volatile("prmt.b32 %0, 0x0, %1, 0x4440;" : "=r"(ix) : "r"(iw));
    145  asm volatile("prmt.b32 %0, 0x0, %1, 0x4441;" : "=r"(iy) : "r"(iw));
    146  asm volatile("prmt.b32 %0, 0x0, %1, 0x4442;" : "=r"(iz) : "r"(iw));
    147  asm volatile("prmt.b32 %0, 0x0, %1, 0x4443;" : "=r"(iw) : "r"(iw));
    148 
    149  // The floats.
    150  float fx, fy, fz, fw;
    151 
    152  // Convert to floats (make sure we generate I2F.F32.S8).
    153  asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fx) : "r"(ix));
    154  asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fy) : "r"(iy));
    155  asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fz) : "r"(iz));
    156  asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fw) : "r"(iw));
    157 
    158  // Store the float4.
    159  dst_f4[i] = make_float4(fx, fy, fz, fw);
    160  }
    161  }
    162 };
    163 
    165 
    166 template <typename InputFragment_, typename OutputScalar_>
    169 };
    170 
    171 template <int kElements_>
    172 struct IgemmGlobalLoadTransformer<Fragment<int8_t, kElements_>, float> {
    174 };
    175 
    177 
    178 template <typename InputScalar_, typename OutputFragment_>
    181 };
    182 
    184 
    185 template <typename IgemmConfig_, typename EpilogueFunctor_, typename Index_>
    187  : public GemmEpilogueTraitsHelper<IgemmConfig_, EpilogueFunctor_, Index_> {
    191  typedef IgemmConfig_ IgemmConfig;
    192 
    194  typedef typename Base::Scalar Scalar;
    196  typedef typename Base::Iterations Iterations;
    198  typedef typename Base::Delta Delta;
    199 
    207  typedef
    209 
    217  typedef
    219 
    232  SharedStoreFragmentD>::Transformer
    242 };
    243 
    245 
    246 template <
    248  typename IgemmConfig_,
    250  typename EpilogueFunctor_,
    252  typename Index_ = int,
    256  // The output tile.
    257  typename IgemmConfig_::OutputTile,
    258  // The accumulators.
    259  typename IgemmConfig_::Accumulators,
    260  // The global iterator for C.
    261  typename Helper_::GlobalLoadIteratorC,
    262  // The transformer for C.
    263  typename Helper_::GlobalTransformerC,
    264  // The transformer for D.
    265  typename Helper_::GlobalTransformerD,
    266  // The global iterator for D.
    267  typename Helper_::GlobalStoreIteratorD,
    268  // The iterator to store D to shared memory.
    269  typename Helper_::SharedStoreIteratorD,
    270  // The shared store transformer for D.
    271  typename Helper_::SharedStoreTransformerD,
    272  // The stream to load D from shared memory.
    273  typename Helper_::SharedLoadStreamD,
    274  // The iterations.
    275  typename Helper_::Iterations,
    276  // The strides between iterations.
    277  typename Helper_::Delta,
    278  // The functor to be used in the epilogue.
    279  EpilogueFunctor_,
    280  // The index.
    281  Index_> {
    283  static bool const kInt8Output =
    285 };
    286 
    288 
    289 template <typename GemmEpilogueTraits_, bool = GemmEpilogueTraits_::kInt8Output>
    290 struct IgemmEpilogue : public GemmEpilogue<GemmEpilogueTraits_> {
    293 
    295  CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const& params_,
    296  typename Base::SharedStorage& shared_storage_,
    297  Coord<3> const& _problem_size)
    298  : Base(params_, shared_storage_, _problem_size) {}
    299 };
    300 
    302 
    303 template <typename GemmEpilogueTraits_>
    304 struct IgemmEpilogue<GemmEpilogueTraits_, true> : public GemmEpilogue<GemmEpilogueTraits_> {
    307 
    309  CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const& params_,
    310  typename Base::SharedStorage& shared_storage_,
    311  Coord<3> const& _problem_size)
    312  : Base(params_, shared_storage_, _problem_size) {}
    313 };
    314 
    316 
    317 } // namespace gemm
    318 } // namespace cutlass
    Definition: gemm_global_tile.h:120
    Definition: igemm_epilogue.h:255
    -
    Definition: load_store.h:42
    +
    Definition: load_store.h:41
    Base::Delta Delta
    The iterations strides.
    Definition: igemm_epilogue.h:198
    -
    Base::Fragment Fragment
    Fragment definition.
    Definition: tile_iterator.h:682
    Base::SharedStoreTileTraits SharedStoreTileTraits
    The traits class for the shared iterator to store D to shared memory.
    Definition: igemm_epilogue.h:221
    IgemmGlobalStoreTransformer< Scalar, GlobalFragmentD >::Transformer GlobalTransformerD
    The transformer from accumulators to shared memory fragments.
    Definition: igemm_epilogue.h:218
    Definition: convert.h:33
    Base::SharedLoadTileTraits SharedLoadTileTraits
    The traits class for the shared iterator to load D from shared memory.
    Definition: igemm_epilogue.h:235
    TileLoadIterator< SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorD
    The shared iterator to load D from shared memory.
    Definition: igemm_epilogue.h:241
    -
    Definition: gemm_epilogue_traits.h:171
    +
    Definition: gemm_epilogue_traits.h:186
    GemmEpilogue< GemmEpilogueTraits_ > Base
    The base class.
    Definition: igemm_epilogue.h:292
    -
    Traits::Params Params
    The params.
    Definition: gemm_epilogue.h:57
    -
    Definition: gemm_epilogue.h:53
    +
    Traits::Params Params
    The params.
    Definition: gemm_epilogue.h:46
    +
    Definition: gemm_epilogue.h:42
    Definition: igemm_epilogue.h:167
    -
    std::is_same (false specialization)
    Definition: platform.h:412
    +
    std::is_same (false specialization)
    Definition: platform.h:420
    Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
    CUTLASS_DEVICE IgemmInt8ToFloatConverter()
    Ctor.
    Definition: igemm_epilogue.h:123
    SharedStoreIteratorD::Fragment SharedStoreFragmentD
    The fragment that needs to be passed to that store iterator.
    Definition: igemm_epilogue.h:229
    -
    EpilogueFunctor_::Scalar Scalar
    The scalar.
    Definition: gemm_epilogue_traits.h:173
    +
    EpilogueFunctor_::Scalar Scalar
    The scalar.
    Definition: gemm_epilogue_traits.h:188
    Definition: igemm_epilogue.h:186
    -
    Definition: load_store.h:43
    +
    Definition: load_store.h:42
    Fragment< int8_t, kElements_ > InputFragment
    The input fragment.
    Definition: igemm_epilogue.h:115
    +
    Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSize > Fragment
    The fragment.
    Definition: tile_iterator.h:196
    Definition: igemm_epilogue.h:290
    Definition: igemm_epilogue.h:45
    CUTLASS_DEVICE void transform(Fragment_ const &src, int offset, OutputFragment &dst)
    Transform a fragment.
    Definition: igemm_epilogue.h:64
    -
    Traits::SharedStorage SharedStorage
    The shared storage.
    Definition: gemm_epilogue.h:59
    +
    Traits::SharedStorage SharedStorage
    The shared storage.
    Definition: gemm_epilogue.h:48
    A template defining Fragment Concept.
    Definition: fragment.h:99
    -
    Definition: tile_iterator.h:62
    +
    Definition: tile_iterator.h:65
    CUTLASS_DEVICE void transform(InputFragment const &src, OutputFragment &dst)
    Transform a fragment.
    Definition: igemm_epilogue.h:126
    Base::Scalar Scalar
    The scalar type of the epilogue.
    Definition: igemm_epilogue.h:194
    +
    CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const &params_, typename Base::SharedStorage &shared_storage_, Coord< 3 > const &_problem_size)
    Ctor.
    Definition: igemm_epilogue.h:295
    GlobalLoadIteratorC::Fragment GlobalFragmentC
    The fragment that needs to be produced by the load iterator.
    Definition: igemm_epilogue.h:205
    +
    Base::Fragment Fragment
    Fragment definition.
    Definition: tile_iterator.h:901
    CUTLASS_DEVICE void transform(InputFragment const &src, OutputFragment &dst)
    Transform a fragment.
    Definition: igemm_epilogue.h:58
    Fragment< int8_t, kElements_ > OutputFragment
    The output fragment.
    Definition: igemm_epilogue.h:49
    GemmGlobalIteratorCd< GlobalStoreTileTraits > GlobalStoreIteratorD
    The iterator to store to shared memory.
    Definition: igemm_epilogue.h:213
    IgemmSharedStoreTransformer< typename IgemmConfig::Accumulators::Element, SharedStoreFragmentD >::Transformer SharedStoreTransformerD
    The transformer from accumulators to shared memory fragments.
    Definition: igemm_epilogue.h:233
    static bool const kInt8Output
    Do we output in int8?
    Definition: igemm_epilogue.h:283
    -
    An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
    Definition: tile_iterator.h:302
    +
    An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
    Definition: tile_iterator.h:399
    Convert< Fragment< InputScalar_, OutputFragment_::kElements >, OutputFragment_ > Transformer
    Definition: igemm_epilogue.h:180
    -
    GemmEpilogue< GemmEpilogueTraits_ > Base
    The base class.
    Definition: igemm_epilogue.h:307
    +
    GemmEpilogue< GemmEpilogueTraits_ > Base
    The base class.
    Definition: igemm_epilogue.h:306
    Defines a type for restructuring a tile.
    Base::GlobalLoadTileTraits GlobalLoadTileTraits
    The traits class for the iterator.
    Definition: igemm_epilogue.h:201
    Fragment< float, kElements_ > OutputFragment
    The output fragment.
    Definition: igemm_epilogue.h:117
    GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ > Base
    The base class.
    Definition: igemm_epilogue.h:189
    -
    CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const &params_, typename Base::SharedStorage &shared_storage_, typename Base::Index m_, typename Base::Index n_)
    Ctor.
    Definition: igemm_epilogue.h:295
    -
    Definition: gemm_shared_tile.h:335
    -
    Traits::Index Index
    The index.
    Definition: gemm_epilogue.h:93
    +
    Definition: gemm_shared_tile.h:339
    GlobalStoreIteratorD::Fragment GlobalFragmentD
    The fragment that needs to be passed to that store iterator.
    Definition: igemm_epilogue.h:215
    GemmGlobalIteratorCd< GlobalLoadTileTraits > GlobalLoadIteratorC
    The iterator to store to shared memory.
    Definition: igemm_epilogue.h:203
    -
    #define static_assert(__e, __m)
    Definition: platform.h:145
    +
    #define static_assert(__e, __m)
    Definition: platform.h:153
    IgemmConfig_ IgemmConfig
    The config.
    Definition: igemm_epilogue.h:191
    -
    CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const &params_, typename Base::SharedStorage &shared_storage_, typename Base::Index m_, typename Base::Index n_)
    Ctor.
    Definition: igemm_epilogue.h:310
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    CUTLASS_DEVICE IgemmFloatToInt8Converter()
    Ctor.
    Definition: igemm_epilogue.h:55
    Element_ Element
    The element.
    Definition: fragment.h:108
    Fragment< float, kElements_ > InputFragment
    The input fragment.
    Definition: igemm_epilogue.h:47
    +
    Definition: gemm_epilogue_traits.h:70
    -
    Definition: gemm_global_tile.h:348
    +
    Definition: gemm_global_tile.h:396
    Definition: igemm_epilogue.h:179
    Implements efficient loading of the thread block-level tile from global memory and storing to shared ...
    -
    Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSize > Fragment
    The fragment.
    Definition: tile_iterator.h:154
    Definition: convert.h:38
    IgemmFloatToInt8Converter< kElements_ > Transformer
    Definition: igemm_epilogue.h:107
    Base::Iterations Iterations
    The iterations.
    Definition: igemm_epilogue.h:196
    @@ -144,7 +143,7 @@ $(function() {
    Base::GlobalStoreTileTraits GlobalStoreTileTraits
    The traits class for the iterator.
    Definition: igemm_epilogue.h:211
    Convert< InputFragment_, Fragment< OutputScalar_, InputFragment_::kElements > > Transformer
    Definition: igemm_epilogue.h:168
    Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
    -
    platform::remove_const< Scalar_ >::type Scalar
    The scalar.
    Definition: gemm_shared_tile.h:266
    +
    platform::remove_const< Scalar_ >::type Scalar
    The scalar.
    Definition: gemm_shared_tile.h:272
    CUTLASS_DEVICE void transform(Fragment_ const &src, int offset, OutputFragment &dst)
    Transform a fragment.
    Definition: igemm_epilogue.h:132
    Convert< Fragment< InputScalar_, OutputFragment_::kElements >, OutputFragment_ > Transformer
    Definition: igemm_epilogue.h:102
    Defines abstractions for managing loading and storing fragments to shared memory in the efficient GEM...
    @@ -153,14 +152,15 @@ $(function() {
    IgemmInt8ToFloatConverter< kElements_ > Transformer
    Definition: igemm_epilogue.h:173
    Defines conversion operations among Fragments of different base type.
    Definition: igemm_epilogue.h:113
    -
    platform::remove_const< Scalar_ >::type Scalar
    The scalar.
    Definition: gemm_shared_tile.h:337
    +
    platform::remove_const< Scalar_ >::type Scalar
    The scalar.
    Definition: gemm_shared_tile.h:341
    +
    CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const &params_, typename Base::SharedStorage &shared_storage_, Coord< 3 > const &_problem_size)
    Ctor.
    Definition: igemm_epilogue.h:309
    Implements tile iterators to partition the thread block tile into 2D subtiles and efficiently load ea...
    -
    Definition: gemm_shared_tile.h:264
    -
    An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
    Definition: tile_iterator.h:620
    +
    Definition: gemm_shared_tile.h:270
    +
    An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
    Definition: tile_iterator.h:836
    diff --git a/docs/igemm__global__tile_8h.html b/docs/igemm__global__tile_8h.html index d6a680168..4b5ee6d7c 100644 --- a/docs/igemm__global__tile_8h.html +++ b/docs/igemm__global__tile_8h.html @@ -82,18 +82,20 @@ $(function() {

    Implements tile iterators to partition the thread block tile into 2D subtiles and efficiently load each. Applies permute transformation to construct 'interleaved K-strided' data layout in which 4-element dot products from the same K index are arranged in consecutive locations within shared memory. More...

    -
    - + - - + + + +

    Classes

    struct  cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    struct  cutlass::gemm::IgemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
     
    struct  cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
    struct  cutlass::gemm::IgemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    struct  cutlass::gemm::IgemmGlobalIteratorAb< TileTraits_, Index_ >
     
    diff --git a/docs/igemm__global__tile_8h_source.html b/docs/igemm__global__tile_8h_source.html index df086169d..04428a68e 100644 --- a/docs/igemm__global__tile_8h_source.html +++ b/docs/igemm__global__tile_8h_source.html @@ -76,33 +76,46 @@ $(function() {
    igemm_global_tile.h
    -Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    33 #pragma once
    34 
    35 #include <cutlass/coord.h>
    37 #include <cutlass/matrix_traits.h>
    38 
    39 namespace cutlass {
    40 namespace gemm {
    41 
    43 
    44 template <GemmOperand::Kind kOperand_,
    45  MatrixLayout::Kind kLayout_,
    46  typename Scalar_,
    47  typename Tile_,
    48  typename Threads_,
    49  int kAccessSize_>
    51  // Which GEMM operand?
    52  kOperand_,
    53  // The layout.
    54  kLayout_,
    55  // The scalar.
    56  Scalar_,
    57  // The tile.
    58  Tile_,
    59  // The threads.
    60  Threads_,
    61  // The number of scalars per LDG/STG.
    62  kAccessSize_> {
    66  typedef typename Base::Threads Threads;
    70  typedef Shape<Base::Tile::kH / Base::Threads::kH / 4,
    71  4,
    72  Base::Tile::kW / Base::Threads::kW,
    73  Base::Tile::kC / Base::kAccessSize>
    75 
    77  struct ThreadOffset {
    79  Coord<4> operator()() const {
    80  int thread_offset_h = threadIdx.x / Threads::kW * ThreadsDelta::kH;
    81  int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW;
    82 
    83  return make_Coord(0, thread_offset_h, thread_offset_w, 0);
    84  }
    85  };
    86 
    87  public:
    90 };
    91 
    93 
    94 } // namespace gemm
    95 } // namespace cutlass
    Computes the thread offset in (H, W) based on thread ID.
    Definition: igemm_global_tile.h:77
    -
    Definition: convert.h:33
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    33 #pragma once
    34 
    35 #include "cutlass/coord.h"
    37 #include "cutlass/matrix_traits.h"
    38 
    39 namespace cutlass {
    40 namespace gemm {
    41 
    43 
    44 template <GemmOperand::Kind kOperand_,
    45  MatrixLayout::Kind kLayout_,
    46  typename Scalar_,
    47  typename Tile_,
    48  typename Threads_,
    49  int kAccessSize_>
    51  // Which GEMM operand?
    52  kOperand_,
    53  // The layout.
    54  kLayout_,
    55  // The scalar.
    56  Scalar_,
    57  // The tile.
    58  Tile_,
    59  // The threads.
    60  Threads_,
    61  // The number of scalars per LDG/STG.
    62  kAccessSize_> {
    66  typedef typename Base::Threads Threads;
    70  typedef Shape<Base::VectorizedTile::kH / Base::Threads::kH / 4,
    71  4,
    72  Base::VectorizedTile::kW / Base::Threads::kW,
    73  Base::VectorizedTile::kC / Base::kAccessSize>
    75 
    77  struct ThreadOffset {
    79  Coord<4> operator()() const {
    80  int thread_offset_h = threadIdx.x / Threads::kW * ThreadsDelta::kH;
    81  int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW;
    82 
    83  return make_Coord(0, thread_offset_h, thread_offset_w, 0);
    84  }
    85  };
    86 
    87  public:
    90 };
    91 
    93 
    94 template <typename TileTraits_, typename Index_ = int>
    95 struct IgemmGlobalIteratorAb : public GemmGlobalIteratorAb<TileTraits_, Index_> {
    99  typedef typename TileTraits_::ThreadOffset ThreadOffset;
    100 
    102  CUTLASS_DEVICE IgemmGlobalIteratorAb(typename Base::Params const& _params,
    103  const Coord<3>& bounds,
    104  const Coord<3>& threadblock_offset,
    105  ThreadOffset thread_offset_func = ThreadOffset())
    106  : Base(_params, bounds, threadblock_offset, thread_offset_func), mask_(0xffffffff) {
    107  // The number of elements read in a single iteration.
    108  int const kBlock = TileTraits_::Tile::kW;
    109  // The residue.
    110  int const kResidue = (int)(bounds[1] % kBlock);
    111 
    112  // Compute the number of elements that are valid.
    113  int const left = kResidue - Base::thread_offset[2];
    114  if (left > 0 && left < 4) {
    115  mask_ = (1u << (8 * left)) - 1u;
    116  }
    117  }
    118 
    119  CUTLASS_DEVICE void load_element(
    120  typename Base::AccessType& value, int d, int h, int w, int c) const {
    121  Base::load_element(value, d, h, w, c);
    122  reinterpret_cast<uint32_t&>(value) &= mask_;
    123  }
    124 
    126  uint32_t mask_;
    127 };
    128 
    130 
    131 } // namespace gemm
    132 } // namespace cutlass
    Definition: convert.h:33
    +
    Base::Threads Threads
    The threads.
    Definition: igemm_global_tile.h:66
    +
    Computes the thread offset in (H, W) based on thread ID.
    Definition: igemm_global_tile.h:77
    Defines iterators for efficiently loading and storing to global memory.
    Definition: gemm_global_tile.h:70
    A Coord is a coordinate of arbitrary rank into a tensor or matrix.
    -
    CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
    Helper to make a 2-element coordinate.
    Definition: coord.h:241
    -
    Shape< Base::Threads::kH *4, 1, Base::Threads::kW, Base::kAccessSize > Delta
    The strides in each dimension between different loads/stores.
    Definition: igemm_global_tile.h:68
    +
    Shape< Base::VectorizedTile::kH/Base::Threads::kH/4, 4, Base::VectorizedTile::kW/Base::Threads::kW, Base::VectorizedTile::kC/Base::kAccessSize > Iterations
    The number of iterations needed to load/store the tile.
    Definition: igemm_global_tile.h:74
    +
    CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
    Helper to make a 2-element coordinate.
    Definition: coord.h:318
    +
    CUTLASS_HOST_DEVICE void load_element(typename Base::AccessType &value, int d, int h, int w, int c) const
    Loads a single fragment element from memory.
    Definition: gemm_global_tile.h:292
    +
    CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
    Definition: igemm_global_tile.h:79
    +
    Definition: gemm_global_tile.h:163
    static int const kH
    The height of the cube.
    Definition: shape.h:68
    -
    GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Base
    The base class.
    Definition: igemm_global_tile.h:64
    -
    Shape< Base::Tile::kH/Base::Threads::kH/4, 4, Base::Tile::kW/Base::Threads::kW, Base::Tile::kC/Base::kAccessSize > Iterations
    The number of iterations needed to load/store the tile.
    Definition: igemm_global_tile.h:74
    +
    An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
    Definition: tile_iterator.h:399
    +
    Definition: igemm_global_tile.h:50
    +
    CUTLASS_DEVICE void load_element(typename Base::AccessType &value, int d, int h, int w, int c) const
    Definition: igemm_global_tile.h:119
    +
    GemmGlobalIteratorAb< TileTraits_, Index_ > Base
    The base class.
    Definition: igemm_global_tile.h:97
    +
    Definition: igemm_global_tile.h:95
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    -
    Definition: igemm_global_tile.h:50
    +
    Definition: vector.h:62
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    +
    TileTraits_::ThreadOffset ThreadOffset
    The functor to compute the thread offset.
    Definition: igemm_global_tile.h:99
    +
    uint32_t mask_
    The mask to clean up the values.
    Definition: igemm_global_tile.h:126
    +
    ReshapeThreads< VectorizedTile, Threads_ >::Threads Threads
    The threads shape.
    Definition: gemm_global_tile.h:88
    +
    CUTLASS_DEVICE IgemmGlobalIteratorAb(typename Base::Params const &_params, const Coord< 3 > &bounds, const Coord< 3 > &threadblock_offset, ThreadOffset thread_offset_func=ThreadOffset())
    Constructor.
    Definition: igemm_global_tile.h:102
    +
    Shape< 1, 4, Base::VectorizedTile::kC > ThreadsDelta
    The threads strides.
    Definition: igemm_global_tile.h:89
    +
    TileTraits_::ThreadOffset ThreadOffset
    The thread offset.
    Definition: gemm_global_tile.h:192
    static int const kW
    The width of the cube.
    Definition: shape.h:70
    -
    Kind
    Definition: matrix_traits.h:36
    +
    Parameters.
    Definition: tile_iterator.h:491
    +
    Kind
    Enumeration defining fundamental contiguous layouts.
    Definition: matrix_traits.h:159
    static int const kAccessSize
    The number of scalars per LDG/STG.
    Definition: gemm_global_tile.h:80
    -
    Kind
    Definition: matrix_traits.h:43
    -
    ReshapeThreads< Tile, Threads_ >::Threads Threads
    The threads shape.
    Definition: gemm_global_tile.h:87
    +
    Kind
    Definition: matrix_traits.h:357
    +
    Shape< Base::Threads::kH *4, 1, Base::Threads::kW, Base::kAccessSize > Delta
    The strides in each dimension between different loads/stores.
    Definition: igemm_global_tile.h:68
    Defines properties of matrices used to denote layout and operands to GEMM kernels.
    -
    Shape< 1, 4, Base::Tile::kC > ThreadsDelta
    The threads strides.
    Definition: igemm_global_tile.h:89
    -
    CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
    Definition: igemm_global_tile.h:79
    -
    Base::Threads Threads
    The threads.
    Definition: igemm_global_tile.h:66
    +
    Coord< 4 > thread_offset
    Offset of an individual lane from the start of the tile.
    Definition: gemm_global_tile.h:237
    +
    GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Base
    The base class.
    Definition: igemm_global_tile.h:64
    diff --git a/docs/igemm__multiply__add_8h.html b/docs/igemm__multiply__add_8h.html index 266cb5f16..d67e57b8d 100644 --- a/docs/igemm__multiply__add_8h.html +++ b/docs/igemm__multiply__add_8h.html @@ -82,15 +82,15 @@ $(function() {

    Implements matrix multiply accumulate operation of 8-bit integer data using DP4A instruction. More...

    -

    @@ -108,7 +110,7 @@ Namespaces

    - - + +

    Classes

    struct  cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
     Template performing matrix multiply-add operation within a thread. More...
    struct  cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int >
     Template performing matrix multiply-add operation within a thread. More...
     
    diff --git a/docs/igemm__multiply__add_8h_source.html b/docs/igemm__multiply__add_8h_source.html index 414c2ce17..b67129ef4 100644 --- a/docs/igemm__multiply__add_8h_source.html +++ b/docs/igemm__multiply__add_8h_source.html @@ -76,29 +76,30 @@ $(function() {
    igemm_multiply_add.h
    -Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include <cutlass/fragment.h>
    32 
    34 
    35 namespace cutlass {
    36 namespace gemm {
    37 
    39 
    41 template <typename AccumulatorsPerThread_, typename ThreadsPerWarp_>
    42 struct ThreadMultiplyAdd<AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int> {
    46  typedef AccumulatorsPerThread_ AccumulatorsPerThread;
    48  typedef ThreadsPerWarp_ ThreadsPerWarp;
    52  typedef int8_t ScalarA;
    56  typedef int8_t ScalarB;
    60  typedef int ScalarC;
    63 
    65  CUTLASS_DEVICE ThreadMultiplyAdd() {}
    66 
    68  CUTLASS_DEVICE void multiply_add(FragmentA const& a,
    69  FragmentB const& b,
    70  Accumulators const& c,
    71  Accumulators& d) {
    72  // The inputs.
    73  int const* a_int = reinterpret_cast<int const*>(&a[0]);
    74  int const* b_int = reinterpret_cast<int const*>(&b[0]);
    75 
    76  for (int j = 0; j < AccumulatorsPerThread::kH; ++j) {
    77  for (int i = 0; i < AccumulatorsPerThread::kW; ++i) {
    78  asm volatile("dp4a.s32.s32 %0, %1, %2, %3;"
    79  : "=r"(d[j * AccumulatorsPerThread::kW + i])
    80  : "r"(a_int[i]), "r"(b_int[j]), "r"(c[j * AccumulatorsPerThread::kW + i]));
    81  }
    82  }
    83  }
    84 };
    85 
    87 
    88 } // namespace gemm
    89 } // namespace cutlass
    -
    Definition: convert.h:33
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include "cutlass/fragment.h"
    32 
    34 
    35 namespace cutlass {
    36 namespace gemm {
    37 
    39 
    41 template <typename ThreadGemmShape_, typename ThreadsPerWarp_>
    42 struct ThreadMultiplyAdd<ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int> {
    46  typedef ThreadGemmShape_ ThreadGemmShape;
    50  typedef ThreadsPerWarp_ ThreadsPerWarp;
    54  typedef int8_t ScalarA;
    58  typedef int8_t ScalarB;
    62  typedef int ScalarC;
    65 
    67  CUTLASS_DEVICE ThreadMultiplyAdd() {}
    68 
    70  CUTLASS_DEVICE void multiply_add(FragmentA const& a,
    71  FragmentB const& b,
    72  Accumulators const& c,
    73  Accumulators& d) {
    74  // The inputs.
    75  int const* a_int = reinterpret_cast<int const*>(&a[0]);
    76  int const* b_int = reinterpret_cast<int const*>(&b[0]);
    77 
    78  for (int j = 0; j < AccumulatorsPerThread::kH; ++j) {
    79  for (int i = 0; i < AccumulatorsPerThread::kW; ++i) {
    80  asm volatile("dp4a.s32.s32 %0, %1, %2, %3;"
    81  : "=r"(d[j * AccumulatorsPerThread::kW + i])
    82  : "r"(a_int[i]), "r"(b_int[j]), "r"(c[j * AccumulatorsPerThread::kW + i]));
    83  }
    84  }
    85  }
    86 };
    87 
    89 
    90 } // namespace gemm
    91 } // namespace cutlass
    Definition: convert.h:33
    +
    Fragment< ScalarA, AccumulatorsPerThread::kW *4 > FragmentA
    The fragment for A.
    Definition: igemm_multiply_add.h:56
    Shape< A_::kD *B_::kD, A_::kH *B_::kH, A_::kW *B_::kW, A_::kC *B_::kC > Shape
    Definition: shape.h:119
    A template defining Fragment Concept.
    Definition: fragment.h:99
    Template implementing matrix multiply-add operations on fragments.
    -
    Fragment< ScalarC, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW > Accumulators
    The accumulators.
    Definition: igemm_multiply_add.h:62
    -
    ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
    The number of accumulators per warp.
    Definition: igemm_multiply_add.h:50
    -
    Fragment< ScalarB, AccumulatorsPerThread::kH *4 > FragmentB
    The fragment for B.
    Definition: igemm_multiply_add.h:58
    - -
    Shape< 4, 1, 1 > InstructionShape
    The shape of the instruction.
    Definition: igemm_multiply_add.h:44
    -
    ThreadsPerWarp_ ThreadsPerWarp
    The number of threads per warp.
    Definition: igemm_multiply_add.h:48
    -
    AccumulatorsPerThread_ AccumulatorsPerThread
    The number of accumulators per thread.
    Definition: igemm_multiply_add.h:46
    +
    CUTLASS_DEVICE ThreadMultiplyAdd()
    Ctor.
    Definition: igemm_multiply_add.h:67
    +
    CUTLASS_DEVICE void multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
    Multiply : d = a*b + c.
    Definition: igemm_multiply_add.h:70
    +
    int ScalarC
    The type for C and D.
    Definition: igemm_multiply_add.h:62
    +
    Shape< 4, 1, 1 > InstructionShape
    The shape of the instruction.
    Definition: igemm_multiply_add.h:44
    +
    ThreadsPerWarp_ ThreadsPerWarp
    The number of threads per warp.
    Definition: igemm_multiply_add.h:50
    +
    ThreadGemmShape_ ThreadGemmShape
    Shape of the thread-level GEMM (K-by-N-by-M)
    Definition: igemm_multiply_add.h:46
    +
    Fragment< ScalarC, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW > Accumulators
    The accumulators.
    Definition: igemm_multiply_add.h:64
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    -
    Template performing matrix multiply-add operation within a thread.
    Definition: thread_multiply_add.h:43
    -
    Fragment< ScalarA, AccumulatorsPerThread::kW *4 > FragmentA
    The fragment for A.
    Definition: igemm_multiply_add.h:54
    - - -
    CUTLASS_DEVICE void multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
    Multiply : d = a*b + c.
    Definition: igemm_multiply_add.h:68
    +
    ShapeMul< ThreadGemmShape, ThreadsPerWarp >::Shape AccumulatorsPerWarp
    The number of accumulators per warp.
    Definition: igemm_multiply_add.h:52
    +
    Template performing matrix multiply-add operation within a thread.
    Definition: thread_multiply_add.h:44
    +
    ThreadGemmShape AccumulatorsPerThread
    Aliased for compatibility. Will be removed in CUTLASS v2.0.
    Definition: igemm_multiply_add.h:48
    +
    Fragment< ScalarB, AccumulatorsPerThread::kH *4 > FragmentB
    The fragment for B.
    Definition: igemm_multiply_add.h:60
    +
    Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
    +
    diff --git a/docs/igemm__swizzle_8h.html b/docs/igemm__swizzle_8h.html index a631d215c..c87855219 100644 --- a/docs/igemm__swizzle_8h.html +++ b/docs/igemm__swizzle_8h.html @@ -82,7 +82,7 @@ $(function() {

    Transposes a fragment of data containing packed 8-bit integer elements. More...

    -

    @@ -103,7 +103,7 @@ Namespaces

    @@ -101,7 +101,7 @@ Namespaces diff --git a/docs/igemm__swizzle_8h_source.html b/docs/igemm__swizzle_8h_source.html index 939908301..015b5f9af 100644 --- a/docs/igemm__swizzle_8h_source.html +++ b/docs/igemm__swizzle_8h_source.html @@ -76,14 +76,14 @@ $(function() {
    igemm_swizzle.h
    -Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/fragment.h>
    31 
    32 namespace cutlass {
    33 namespace gemm {
    34 
    36 
    37 template <typename GlobalIterator_>
    38 struct IgemmSwizzle {
    40  typedef GlobalIterator_ GlobalIterator;
    42  typedef typename GlobalIterator::Fragment Fragment;
    44  typedef typename GlobalIterator::FragmentShape FragmentShape;
    45 
    50 
    53 
    55  static_assert(FragmentShape::kH % 4 == 0 && ShapeCount<FragmentShape>::kWc % 4 == 0,
    56  "Not multiple of 4");
    57 
    59  CUTLASS_DEVICE IgemmSwizzle() {}
    60 
    62  CUTLASS_DEVICE void transform(Fragment const& src, Fragment& dst) {
    63  // Expose src/dst as int arrays.
    64  int const* src_int = reinterpret_cast<int const*>(&src[0]);
    65  int* dst_int = reinterpret_cast<int*>(&dst[0]);
    66 
    67  // Transpose the data.
    68  for (int d = 0; d < FragmentShape::kD; ++d) {
    69  for (int h = 0; h < FragmentShape::kH / 4; ++h) {
    70  for (int w = 0; w < ShapeCount<FragmentShape>::kWc / 4; ++w) {
    71  int const i0 = d * (ShapeCount<FragmentShape>::kHwc / 4) +
    72  (4 * h + 0) * (ShapeCount<FragmentShape>::kWc / 4) + w;
    73  int const i1 = d * (ShapeCount<FragmentShape>::kHwc / 4) +
    74  (4 * h + 1) * (ShapeCount<FragmentShape>::kWc / 4) + w;
    75  int const i2 = d * (ShapeCount<FragmentShape>::kHwc / 4) +
    76  (4 * h + 2) * (ShapeCount<FragmentShape>::kWc / 4) + w;
    77  int const i3 = d * (ShapeCount<FragmentShape>::kHwc / 4) +
    78  (4 * h + 3) * (ShapeCount<FragmentShape>::kWc / 4) + w;
    79 
    80  int a0 = src_int[i0];
    81  int a1 = src_int[i1];
    82  int a2 = src_int[i2];
    83  int a3 = src_int[i3];
    84 
    85  int b0, b1, b2, b3, c0;
    86  asm volatile("prmt.b32 %0, %1, %2, 0x0040;" : "=r"(b0) : "r"(a0), "r"(a1));
    87  asm volatile("prmt.b32 %0, %1, %2, 0x0040;" : "=r"(c0) : "r"(a2), "r"(a3));
    88  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b0) : "r"(b0), "r"(c0));
    89 
    90  asm volatile("prmt.b32 %0, %1, %2, 0x0051;" : "=r"(b1) : "r"(a0), "r"(a1));
    91  asm volatile("prmt.b32 %0, %1, %2, 0x0051;" : "=r"(c0) : "r"(a2), "r"(a3));
    92  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b1) : "r"(b1), "r"(c0));
    93 
    94  asm volatile("prmt.b32 %0, %1, %2, 0x0062;" : "=r"(b2) : "r"(a0), "r"(a1));
    95  asm volatile("prmt.b32 %0, %1, %2, 0x0062;" : "=r"(c0) : "r"(a2), "r"(a3));
    96  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b2) : "r"(b2), "r"(c0));
    97 
    98  asm volatile("prmt.b32 %0, %1, %2, 0x0073;" : "=r"(b3) : "r"(a0), "r"(a1));
    99  asm volatile("prmt.b32 %0, %1, %2, 0x0073;" : "=r"(c0) : "r"(a2), "r"(a3));
    100  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b3) : "r"(b3), "r"(c0));
    101 
    102  dst_int[i0] = b0;
    103  dst_int[i1] = b1;
    104  dst_int[i2] = b2;
    105  dst_int[i3] = b3;
    106  }
    107  }
    108  }
    109  }
    110 };
    111 
    113 
    114 } // namespace gemm
    115 } // namespace cutlass
    Definition: convert.h:33
    -
    std::is_same (false specialization)
    Definition: platform.h:412
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include "cutlass/fragment.h"
    31 
    32 namespace cutlass {
    33 namespace gemm {
    34 
    36 
    37 template <typename GlobalIterator_>
    38 struct IgemmSwizzle {
    40  typedef GlobalIterator_ GlobalIterator;
    42  typedef typename GlobalIterator::Fragment Fragment;
    44  typedef typename GlobalIterator::FragmentShape FragmentShape;
    45 
    50 
    53 
    55  static_assert(FragmentShape::kH % 4 == 0 && ShapeCount<FragmentShape>::kWc % 4 == 0,
    56  "Not multiple of 4");
    57 
    59  CUTLASS_DEVICE IgemmSwizzle() {}
    60 
    62  CUTLASS_DEVICE void transform(Fragment const& src, Fragment& dst) {
    63  // Expose src/dst as int arrays.
    64  int const* src_int = reinterpret_cast<int const*>(&src[0]);
    65  int* dst_int = reinterpret_cast<int*>(&dst[0]);
    66 
    67  // Transpose the data.
    68  for (int d = 0; d < FragmentShape::kD; ++d) {
    69  for (int h = 0; h < FragmentShape::kH / 4; ++h) {
    70  for (int w = 0; w < ShapeCount<FragmentShape>::kWc / 4; ++w) {
    71  int const i0 = d * (ShapeCount<FragmentShape>::kHwc / 4) +
    72  (4 * h + 0) * (ShapeCount<FragmentShape>::kWc / 4) + w;
    73  int const i1 = d * (ShapeCount<FragmentShape>::kHwc / 4) +
    74  (4 * h + 1) * (ShapeCount<FragmentShape>::kWc / 4) + w;
    75  int const i2 = d * (ShapeCount<FragmentShape>::kHwc / 4) +
    76  (4 * h + 2) * (ShapeCount<FragmentShape>::kWc / 4) + w;
    77  int const i3 = d * (ShapeCount<FragmentShape>::kHwc / 4) +
    78  (4 * h + 3) * (ShapeCount<FragmentShape>::kWc / 4) + w;
    79 
    80  int a0 = src_int[i0];
    81  int a1 = src_int[i1];
    82  int a2 = src_int[i2];
    83  int a3 = src_int[i3];
    84 
    85  // // DEBUG.
    86  // if (threadIdx.x == 0) {
    87  // printf("a=0x%08x 0x%08x 0x%08x 0x%08x\n", a0, a1, a2, a3);
    88  // }
    89 
    90  int b0, b1, b2, b3, c0;
    91  asm volatile("prmt.b32 %0, %1, %2, 0x0040;" : "=r"(b0) : "r"(a0), "r"(a1));
    92  asm volatile("prmt.b32 %0, %1, %2, 0x0040;" : "=r"(c0) : "r"(a2), "r"(a3));
    93  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b0) : "r"(b0), "r"(c0));
    94 
    95  asm volatile("prmt.b32 %0, %1, %2, 0x0051;" : "=r"(b1) : "r"(a0), "r"(a1));
    96  asm volatile("prmt.b32 %0, %1, %2, 0x0051;" : "=r"(c0) : "r"(a2), "r"(a3));
    97  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b1) : "r"(b1), "r"(c0));
    98 
    99  asm volatile("prmt.b32 %0, %1, %2, 0x0062;" : "=r"(b2) : "r"(a0), "r"(a1));
    100  asm volatile("prmt.b32 %0, %1, %2, 0x0062;" : "=r"(c0) : "r"(a2), "r"(a3));
    101  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b2) : "r"(b2), "r"(c0));
    102 
    103  asm volatile("prmt.b32 %0, %1, %2, 0x0073;" : "=r"(b3) : "r"(a0), "r"(a1));
    104  asm volatile("prmt.b32 %0, %1, %2, 0x0073;" : "=r"(c0) : "r"(a2), "r"(a3));
    105  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b3) : "r"(b3), "r"(c0));
    106 
    107  // // DEBUG.
    108  // if (threadIdx.x == 0) {
    109  // printf("b=0x%08x 0x%08x 0x%08x 0x%08x\n", b0, b1, b2, b3);
    110  // }
    111 
    112  dst_int[i0] = b0;
    113  dst_int[i1] = b1;
    114  dst_int[i2] = b2;
    115  dst_int[i3] = b3;
    116  }
    117  }
    118  }
    119  }
    120 };
    121 
    123 
    124 } // namespace gemm
    125 } // namespace cutlass
    Definition: convert.h:33
    +
    std::is_same (false specialization)
    Definition: platform.h:420
    GlobalIterator::FragmentShape FragmentShape
    The shape of the source fragment.
    Definition: igemm_swizzle.h:44
    Definition: igemm_swizzle.h:38
    GlobalIterator_ GlobalIterator
    The global iterator.
    Definition: igemm_swizzle.h:40
    CUTLASS_DEVICE void transform(Fragment const &src, Fragment &dst)
    Transform a fragment.
    Definition: igemm_swizzle.h:62
    Fragment OutputFragment
    The destination fragment.
    Definition: igemm_swizzle.h:49
    -
    #define static_assert(__e, __m)
    Definition: platform.h:145
    +
    #define static_assert(__e, __m)
    Definition: platform.h:153
    Fragment InputFragment
    The source fragment.
    Definition: igemm_swizzle.h:47
    GlobalIterator::Fragment Fragment
    The source fragment.
    Definition: igemm_swizzle.h:42
    CUTLASS_DEVICE IgemmSwizzle()
    The src/dst must be int8 fragments.
    Definition: igemm_swizzle.h:59
    @@ -92,7 +92,7 @@ $(function() {
    diff --git a/docs/igemm__traits_8h.html b/docs/igemm__traits_8h.html index 32d14d876..897687ee2 100644 --- a/docs/igemm__traits_8h.html +++ b/docs/igemm__traits_8h.html @@ -82,34 +82,38 @@ $(function() {

    Defies structural properties of mixed-precision integer GEMM. Multiplicands are assumed to be packed 8bit integers, accumulators are assumed to be 32b signed integers, and output formats vary. More...

    -
    - + - + - + - + - + - + + + + + @@ -123,13 +127,13 @@ Classes - + - +

    Classes

    struct  cutlass::gemm::IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ >
    struct  cutlass::gemm::IgemmConfig< OutputTile_, ScalarD_, ThreadGemmShape_ >
     
    struct  cutlass::gemm::IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ >
    struct  cutlass::gemm::IgemmConfig< OutputTile_, int8_t, ThreadGemmShape_ >
     
    struct  cutlass::gemm::IgemmTileTraitsHelperA< kLayout_, GemmConfig_ >
    struct  cutlass::gemm::IgemmTileTraitsHelperA< kLayout_, GemmConfig_, Index_ >
     
    struct  cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
    struct  cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_, Index_ >
     
    struct  cutlass::gemm::IgemmTileTraitsHelperB< kLayout_, GemmConfig_ >
    struct  cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_, Index_ >
     
    struct  cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
    struct  cutlass::gemm::IgemmTileTraitsHelperB< kLayout_, GemmConfig_, Index_ >
     
    struct  cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_, Index_ >
     
    struct  cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_, Index_ >
     
    struct  cutlass::gemm::IgemmTransformerA< kLayout_, Iterator_ >
     
     
    struct  cutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
     
    struct  cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    struct  cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, ThreadGemmShape_, Index_ >
     
    struct  cutlass::gemm::IgemmEpilogueScalar< ScalarD_ >
     
    struct  cutlass::gemm::IgemmEpilogueScalar< int >
     
    struct  cutlass::gemm::IgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_, Helper_ >
    struct  cutlass::gemm::IgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, ThreadGemmShape_, Index_, Helper_ >
     
    diff --git a/docs/igemm__traits_8h_source.html b/docs/igemm__traits_8h_source.html index ecdd4f1df..e1fa87e40 100644 --- a/docs/igemm__traits_8h_source.html +++ b/docs/igemm__traits_8h_source.html @@ -76,89 +76,108 @@ $(function() {
    igemm_traits.h
    -Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    30 #pragma once
    31 
    32 #include <cutlass/convert.h>
    33 #include <cutlass/gemm/gemm.h>
    43 #include <cutlass/reshape_tile.h>
    44 
    45 namespace cutlass {
    46 namespace gemm {
    47 
    49 
    50 template <
    52  typename OutputTile_,
    54  typename ScalarD_,
    56  typename AccumulatorsPerThread_>
    58  : public GemmConfig<
    60  int8_t,
    62  int8_t,
    64  ScalarD_,
    66  ScalarD_,
    68  OutputTile_,
    70  ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, int8_t, int8_t, int>,
    72  4,
    74  4,
    76  16,
    78  4,
    80  4,
    82  16,
    84  1,
    86  4,
    88  1,
    90  2> {};
    91 
    93 
    94 template <typename OutputTile_, typename AccumulatorsPerThread_>
    95 struct IgemmConfig<OutputTile_, int8_t, AccumulatorsPerThread_>
    96  : public GemmConfig<
    98  int8_t,
    100  int8_t,
    102  int8_t,
    104  int8_t,
    106  OutputTile_,
    108  ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, int8_t, int8_t, int>,
    110  4,
    112  4,
    114  16,
    116  4,
    118  4,
    120  16,
    122  4,
    124  4,
    126  4,
    128  2> {};
    129 
    131 
    132 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
    133 struct IgemmTileTraitsHelperA : public GemmTileTraitsHelperA<kLayout_, GemmConfig_> {};
    134 
    136 
    137 template <typename GemmConfig_>
    138 struct IgemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_>
    139  : public GemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_> {
    142 
    144  static int const kScalarsPerStsA = 16;
    145 
    149  // The layout.
    151  // The pointer is float const.
    152  int8_t const,
    153  // The tile has size KxM in GEMM's terminology.
    155  // The threads are distributed as warps x 32 (the traits may reorganize).
    157  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
    158  4>
    160 
    163  // The pointer is float.
    164  int8_t,
    165  // The tile has size KxM in GEMM's terminology.
    166  Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / 4, GemmConfig_::OutputTile::kW * 4>,
    167  // The threads are distributed as warps x 32 (the traits may reorganize).
    168  typename GlobalTileTraits::Threads,
    169  // The number of scalars per STS (STS.32 or STS.128, etc).
    170  kScalarsPerStsA>
    172 };
    173 
    175 
    176 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
    177 struct IgemmTileTraitsHelperB : public GemmTileTraitsHelperB<kLayout_, GemmConfig_> {};
    178 
    180 
    181 template <typename GemmConfig_>
    182 struct IgemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_>
    183  : public GemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_> {
    186 
    188  static int const kScalarsPerStsB = 16;
    189 
    193  // The layout.
    195  // The pointer is float const.
    196  int8_t const,
    197  // The tile has size KxM in GEMM's terminology.
    199  // The threads are distributed as warps x 32 (the traits may reorganize).
    201  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
    202  4>
    204 
    207  // The pointer is float.
    208  int8_t,
    209  // The tile has size KxM in GEMM's terminology.
    210  Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / 4, GemmConfig_::OutputTile::kH * 4>,
    211  // The threads are distributed as warps x 32 (the traits may reorganize).
    212  typename GlobalTileTraits::Threads,
    213  // The number of scalars per STS (STS.32 or STS.128, etc).
    214  kScalarsPerStsB>
    216 };
    217 
    219 
    220 template <enum MatrixLayout::Kind kLayout_, typename Iterator_>
    222 
    223 template <typename Iterator_>
    224 struct IgemmTransformerA<MatrixLayout::kRowMajor, Iterator_> {
    226 };
    227 
    228 template <typename Iterator_>
    229 struct IgemmTransformerA<MatrixLayout::kColumnMajor, Iterator_> {
    231 };
    232 
    234 
    235 template <enum MatrixLayout::Kind kLayout_, typename Iterator_>
    237 
    238 template <typename Iterator_>
    239 struct IgemmTransformerB<MatrixLayout::kColumnMajor, Iterator_> {
    241 };
    242 
    243 template <typename Iterator_>
    244 struct IgemmTransformerB<MatrixLayout::kRowMajor, Iterator_> {
    246 };
    247 
    249 
    250 template <
    252  MatrixLayout::Kind kLayoutA_,
    254  MatrixLayout::Kind kLayoutB_,
    256  typename OutputTile_,
    258  typename ScalarD_,
    260  typename EpilogueFunctor_,
    262  typename AccumulatorsPerThread_ = Shape<32, 8, 8>,
    264  typename Index_ = int>
    272 
    277  typedef typename IgemmTransformerA<GemmTileTraitsHelperA::kLayout,
    280  typedef TileStoreIterator<typename GemmTileTraitsHelperA::SharedStoreTileTraits,
    281  typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar,
    288 
    292  // The default transformer for B.
    293  typedef typename IgemmTransformerB<GemmTileTraitsHelperB::kLayout,
    296  typedef TileStoreIterator<typename GemmTileTraitsHelperB::SharedStoreTileTraits,
    297  typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar,
    304 
    306  typedef TileLoadIterator<typename GemmTileTraitsHelperA::SharedLoadTileTraits,
    307  typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar,
    315  typedef TileLoadIterator<typename GemmTileTraitsHelperB::SharedLoadTileTraits,
    316  typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar,
    323 
    328 
    331 };
    332 
    334 
    335 template <typename ScalarD_>
    337  typedef float Scalar;
    338 };
    339 
    340 template <>
    341 struct IgemmEpilogueScalar<int> {
    342  typedef int Scalar;
    343 };
    344 
    346 
    347 template <
    349  MatrixLayout::Kind kLayoutA_,
    351  MatrixLayout::Kind kLayoutB_,
    353  typename OutputTile_ = Shape<32, 128, 128>,
    355  typename ScalarD_ = int,
    359  typename AccumulatorsPerThread_ = Shape<32, 8, 8>,
    361  typename Index_ = int,
    363  typename Helper_ = IgemmTraitsHelper<kLayoutA_,
    364  kLayoutB_,
    365  OutputTile_,
    366  ScalarD_,
    367  EpilogueFunctor_,
    368  AccumulatorsPerThread_,
    369  Index_> >
    370 struct IgemmTraits : public GemmTraits<
    371  // The config.
    372  typename Helper_::GemmConfig,
    373  // The stream to load A from global memory to shared memory.
    374  typename Helper_::GlobalLoadStreamA,
    375  // The stream to load B from global memory to shared memory.
    376  typename Helper_::GlobalLoadStreamB,
    377  // The stream to load A from shared memory.
    378  typename Helper_::SharedLoadStreamA,
    379  // The stream to load B from shared memory.
    380  typename Helper_::SharedLoadStreamB,
    381  // The epilogue.
    382  typename Helper_::Epilogue,
    383  // The block swizzle to reorganize the grid.
    384  IdentityBlockSwizzle,
    385  // The index.
    386  Index_,
    387  // The tool used to clear accumulators.
    388  typename Helper_::ClearAccumulators> {};
    389 
    391 
    392 } // namespace gemm
    393 } // namespace cutlass
    Definition: load_store.h:42
    -
    TileLoadIterator< typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorB
    The iterator to load B from shared memory.
    Definition: igemm_traits.h:319
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    30 #pragma once
    31 
    32 #include "cutlass/convert.h"
    33 #include "cutlass/gemm/gemm.h"
    43 #include "cutlass/reshape_tile.h"
    44 
    45 namespace cutlass {
    46 namespace gemm {
    47 
    49 
    50 template <
    52  typename OutputTile_,
    54  typename ScalarD_,
    56  typename ThreadGemmShape_>
    57 struct IgemmConfig : public GemmConfig<
    59  int8_t,
    61  int8_t,
    63  ScalarD_,
    65  ScalarD_,
    67  OutputTile_,
    69  ThreadMultiplyAdd<ThreadGemmShape_, Shape<1, 4, 8>, int8_t, int8_t, int>,
    71  4,
    73  4,
    75  16,
    77  4,
    79  4,
    81  16,
    83  1,
    85  4,
    87  1,
    89  2,
    91  false,
    93  false,
    95  false> {};
    96 
    98 
    99 template <typename OutputTile_, typename ThreadGemmShape_>
    100 struct IgemmConfig<OutputTile_, int8_t, ThreadGemmShape_>
    101  : public GemmConfig<
    103  int8_t,
    105  int8_t,
    107  int8_t,
    109  int8_t,
    111  OutputTile_,
    113  ThreadMultiplyAdd<ThreadGemmShape_, Shape<1, 4, 8>, int8_t, int8_t, int>,
    115  4,
    117  4,
    119  16,
    121  4,
    123  4,
    125  16,
    127  4,
    129  4,
    131  4,
    133  2,
    135  false,
    137  true,
    139  false> {};
    140 
    142 
    143 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_, typename Index_>
    144 struct IgemmTileTraitsHelperA : public GemmTileTraitsHelperA<kLayout_, GemmConfig_> {};
    145 
    147 
    148 template <typename GemmConfig_, typename Index_>
    149 struct IgemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_, Index_>
    150  : public GemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_> {
    153 
    155  static int const kScalarsPerStsA = 16;
    156 
    158  typedef IgemmGlobalTileTraits<
    160  // The layout.
    162  // The pointer is float const.
    163  int8_t const,
    164  // The tile has size KxM in GEMM's terminology.
    166  // The threads are distributed as warps x 32 (the traits may reorganize).
    168  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
    169  GemmConfig_::kScalarsPerLdgA>
    171 
    174 
    177  // The pointer is float.
    178  int8_t,
    179  // The tile has size KxM in GEMM's terminology.
    180  Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / 4, GemmConfig_::OutputTile::kW * 4>,
    181  // The threads are distributed as warps x 32 (the traits may reorganize).
    182  typename GlobalTileTraits::Threads,
    183  // The number of scalars per STS (STS.32 or STS.128, etc).
    184  kScalarsPerStsA>
    186 };
    187 
    189 
    190 template <typename GemmConfig_, typename Index_>
    191 struct IgemmTileTraitsHelperA<MatrixLayout::kRowMajor, GemmConfig_, Index_> {
    194 
    196  typedef int8_t Scalar;
    198  typedef int8_t MultiplyAddScalar;
    199 
    201  static int const kScalarsPerStsA = 16;
    202 
    204  typedef IgemmGlobalTileTraits<
    206  // The layout.
    208  // The pointer is float const.
    209  int8_t const,
    210  // The tile has size NxK in GEMM's terminology.
    212  // The threads are distributed as warps x 32 (the traits may reorganize).
    214  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
    215  GemmConfig_::kScalarsPerLdgA>
    217 
    220 
    223  // The pointer is int8.
    224  int8_t,
    225  // The tile has size KxN in GEMM's terminology.
    226  Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / 4, GemmConfig_::OutputTile::kW * 4>,
    227  // The threads are distributed as (threads / K) x K (the traits may reorganize).
    228  typename GlobalTileTraits::Threads,
    229  // The number of scalars per STS.
    230  kScalarsPerStsA,
    231  // The skew to avoid bank conflicts added in the tile W dimension.
    232  16>
    234 
    237  // The pointer is float const.
    238  int8_t const,
    239  // The output tile size.
    240  typename GemmConfig_::OutputTile,
    241  // The number of warps.
    242  typename GemmConfig_::Warps,
    243  // The number of threads per warp.
    244  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
    245  // The shape of the FMA instruction.
    246  typename GemmConfig_::InstructionShape,
    247  // The number of stages.
    248  GemmConfig_::kStages,
    249  // The number of scalars per LDS.
    250  16,
    251  // The skew.
    252  SharedStoreTileTraits::kSkew>
    254 };
    255 
    257 
    258 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_, typename Index_>
    259 struct IgemmTileTraitsHelperB : public GemmTileTraitsHelperB<kLayout_, GemmConfig_> {};
    260 
    262 
    263 template <typename GemmConfig_, typename Index_>
    264 struct IgemmTileTraitsHelperB<MatrixLayout::kColumnMajor, GemmConfig_, Index_> {
    267 
    269  typedef int8_t Scalar;
    271  typedef int8_t MultiplyAddScalar;
    272 
    274  static int const kScalarsPerStsB = 16;
    275 
    277  typedef IgemmGlobalTileTraits<
    279  // The layout.
    281  // The pointer is float const.
    282  int8_t const,
    283  // The tile has size NxK in GEMM's terminology.
    285  // The threads are distributed as warps x 32 (the traits may reorganize).
    287  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
    288  GemmConfig_::kScalarsPerLdgB>
    290 
    293 
    296  // The pointer is int8.
    297  int8_t,
    298  // The tile has size KxN in GEMM's terminology.
    299  Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / 4, GemmConfig_::OutputTile::kH * 4>,
    300  // The threads are distributed as (threads / K) x K (the traits may reorganize).
    301  typename GlobalTileTraits::Threads,
    302  // The number of scalars per STS.
    303  kScalarsPerStsB,
    304  // The skew to avoid bank conflicts added in the tile W dimension.
    305  16>
    307 
    310  // The pointer is float const.
    311  int8_t const,
    312  // The output tile size.
    313  typename GemmConfig_::OutputTile,
    314  // The number of warps.
    315  typename GemmConfig_::Warps,
    316  // The number of threads per warp.
    317  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
    318  // The shape of the FMA instruction.
    319  typename GemmConfig_::InstructionShape,
    320  // The number of stages.
    321  GemmConfig_::kStages,
    322  // The number of scalars per LDS.
    323  16,
    324  // The skew.
    325  SharedStoreTileTraits::kSkew>
    327 };
    328 
    330 
    331 template <typename GemmConfig_, typename Index_>
    332 struct IgemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_, Index_>
    333  : public GemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_> {
    336 
    338  static int const kScalarsPerStsB = 16;
    339 
    341  typedef IgemmGlobalTileTraits<
    343  // The layout.
    345  // The pointer is float const.
    346  int8_t const,
    347  // The tile has size KxM in GEMM's terminology.
    349  // The threads are distributed as warps x 32 (the traits may reorganize).
    351  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
    352  GemmConfig_::kScalarsPerLdgB>
    354 
    357 
    360  // The pointer is float.
    361  int8_t,
    362  // The tile has size KxM in GEMM's terminology.
    363  Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / 4, GemmConfig_::OutputTile::kH * 4>,
    364  // The threads are distributed as warps x 32 (the traits may reorganize).
    365  typename GlobalTileTraits::Threads,
    366  // The number of scalars per STS (STS.32 or STS.128, etc).
    367  kScalarsPerStsB>
    369 };
    370 
    372 
    373 template <enum MatrixLayout::Kind kLayout_, typename Iterator_>
    375 
    376 template <typename Iterator_>
    377 struct IgemmTransformerA<MatrixLayout::kRowMajor, Iterator_> {
    379 };
    380 
    381 template <typename Iterator_>
    382 struct IgemmTransformerA<MatrixLayout::kColumnMajor, Iterator_> {
    384 };
    385 
    387 
    388 template <enum MatrixLayout::Kind kLayout_, typename Iterator_>
    390 
    391 template <typename Iterator_>
    392 struct IgemmTransformerB<MatrixLayout::kColumnMajor, Iterator_> {
    394 };
    395 
    396 template <typename Iterator_>
    397 struct IgemmTransformerB<MatrixLayout::kRowMajor, Iterator_> {
    399 };
    400 
    402 
    403 template <
    405  MatrixLayout::Kind kLayoutA_,
    407  MatrixLayout::Kind kLayoutB_,
    409  typename OutputTile_,
    411  typename ScalarD_,
    413  typename EpilogueFunctor_,
    415  typename ThreadGemmShape_ = Shape<32, 8, 8>,
    417  typename Index_ = int>
    425 
    427  typedef typename GemmTileTraitsHelperA::GlobalLoadIterator GlobalLoadIteratorA;
    429  typedef typename IgemmTransformerA<GemmTileTraitsHelperA::kLayout,
    432  typedef TileStoreIterator<typename GemmTileTraitsHelperA::SharedStoreTileTraits,
    433  typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar,
    443 
    445  typedef typename GemmTileTraitsHelperB::GlobalLoadIterator GlobalLoadIteratorB;
    446  // The default transformer for B.
    447  typedef typename IgemmTransformerB<GemmTileTraitsHelperB::kLayout,
    450  typedef TileStoreIterator<typename GemmTileTraitsHelperB::SharedStoreTileTraits,
    451  typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar,
    461 
    463  typedef TileLoadIterator<typename GemmTileTraitsHelperA::SharedLoadTileTraits,
    464  typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar,
    472  typedef TileLoadIterator<typename GemmTileTraitsHelperB::SharedLoadTileTraits,
    473  typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar,
    480 
    485 
    488 };
    489 
    491 
    492 template <typename ScalarD_>
    494  typedef float Scalar;
    495 };
    496 
    497 template <>
    498 struct IgemmEpilogueScalar<int> {
    499  typedef int Scalar;
    500 };
    501 
    503 
    504 template <
    506  MatrixLayout::Kind kLayoutA_,
    508  MatrixLayout::Kind kLayoutB_,
    510  typename OutputTile_ = Shape<32, 128, 128>,
    512  typename ScalarD_ = int,
    516  typename ThreadGemmShape_ = Shape<32, 8, 8>,
    518  typename Index_ = int,
    520  typename Helper_ = IgemmTraitsHelper<kLayoutA_,
    521  kLayoutB_,
    522  OutputTile_,
    523  ScalarD_,
    524  EpilogueFunctor_,
    525  ThreadGemmShape_,
    526  Index_> >
    527 struct IgemmTraits : public GemmTraits<
    528  // The config.
    529  typename Helper_::GemmConfig,
    530  // The stream to load A from global memory to shared memory.
    531  typename Helper_::GlobalLoadStreamA,
    532  // The stream to load B from global memory to shared memory.
    533  typename Helper_::GlobalLoadStreamB,
    534  // The stream to load A from shared memory.
    535  typename Helper_::SharedLoadStreamA,
    536  // The stream to load B from shared memory.
    537  typename Helper_::SharedLoadStreamB,
    538  // The epilogue.
    539  typename Helper_::Epilogue,
    540  // The block swizzle to reorganize the grid.
    541  IdentityBlockSwizzle,
    542  // The index.
    543  Index_,
    544  // The tool used to clear accumulators.
    545  typename Helper_::ClearAccumulators> {};
    546 
    548 
    549 } // namespace gemm
    550 } // namespace cutlass
    IgemmTransformerB< GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB >::Transformer GlobalTransformerB
    Definition: igemm_traits.h:448
    +
    Definition: load_store.h:41
    +
    GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Base
    The base config.
    Definition: igemm_traits.h:335
    Definition: convert.h:33
    -
    IgemmSwizzle< Iterator_ > Transformer
    Definition: igemm_traits.h:230
    +
    Definition: gemm_shared_tile.h:128
    +
    Base::Threads Threads
    The threads.
    Definition: igemm_global_tile.h:66
    +
    IgemmTileTraitsHelperB< kLayoutB_, GemmConfig, Index_ > GemmTileTraitsHelperB
    The GEMM config for B.
    Definition: igemm_traits.h:424
    + +
    IgemmSwizzle< Iterator_ > Transformer
    Definition: igemm_traits.h:383
    Defines iterators for efficiently loading and storing to global memory.
    -
    GemmGlobalIteratorAb< typename GemmTileTraitsHelperA::GlobalTileTraits, Index_ > GlobalLoadIteratorA
    The iterator to load A from global memory.
    Definition: igemm_traits.h:275
    Transposes a fragment of data containing packed 8-bit integer elements.
    -
    Copy< typename Iterator_::Fragment > Transformer
    Definition: igemm_traits.h:240
    +
    Copy< typename Iterator_::Fragment > Transformer
    Definition: igemm_traits.h:393
    +
    GemmSharedStoreWithSkewTileAbTraits< int8_t, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/4, GemmConfig_::OutputTile::kW *4 >, typename GlobalTileTraits::Threads, kScalarsPerStsA, 16 > SharedStoreTileTraits
    The traits class to build the iterator to store data to shared memory for A^N.
    Definition: igemm_traits.h:233
    +
    IgemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, int8_t const, Shape< 1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
    The traits class to build the iterator to load data from global memory for B^T.
    Definition: igemm_traits.h:289
    Defines structural properties of complete GEMM computation.
    -
    GlobalLoadStream< GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB > GlobalLoadStreamB
    The stream to load B from global memory to shared memory.
    Definition: igemm_traits.h:303
    -
    Definition: igemm_traits.h:133
    -
    TileStoreIterator< typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorB
    The iterator to store B to shared memory.
    Definition: igemm_traits.h:300
    -
    IgemmTransformerB< GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB >::Transformer GlobalTransformerB
    Definition: igemm_traits.h:294
    +
    IgemmGlobalIteratorAb< GlobalTileTraits, Index_ > GlobalLoadIterator
    The global load iterator.
    Definition: igemm_traits.h:219
    +
    Definition: igemm_traits.h:144
    Definition: igemm_epilogue.h:290
    -
    IgemmContiguousGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, int8_t const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, 4 > GlobalTileTraits
    The traits class to build the iterator to load data from global memory for B^T.
    Definition: igemm_traits.h:203
    Definition: convert.h:69
    -
    GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Base
    The base config.
    Definition: igemm_traits.h:141
    -
    IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ > GemmConfig
    The IGEMM config.
    Definition: igemm_traits.h:267
    +
    IgemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, int8_t const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
    The traits class to build the iterator to load data from global memory for B^T.
    Definition: igemm_traits.h:353
    Definition: gemm_shared_tile.h:38
    -
    Definition: tile_iterator.h:62
    +
    Definition: tile_iterator.h:65
    +
    int8_t MultiplyAddScalar
    The scalar stored in shared memory.
    Definition: igemm_traits.h:198
    +
    GemmTileTraitsHelperB::GlobalLoadIterator GlobalLoadIteratorB
    The iterator to load B from global memory.
    Definition: igemm_traits.h:445
    Implements matrix multiply accumulate operation of 8-bit integer data using DP4A instruction.
    -
    Definition: gemm_global_tile.h:159
    -
    GemmSharedStoreTileAbTraits< int8_t, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/4, GemmConfig_::OutputTile::kH *4 >, typename GlobalTileTraits::Threads, kScalarsPerStsB > SharedStoreTileTraits
    The traits class to build the iterator to store data to shared memory for B^N.
    Definition: igemm_traits.h:215
    +
    Definition: gemm_shared_tile.h:200
    +
    TileStoreIterator< typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorB
    The iterator to store B to shared memory.
    Definition: igemm_traits.h:454
    +
    GemmSharedLoadTileBTraits< int8_t const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, 16, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
    The traits class to build the iterator to load from shared memory for B^N.
    Definition: igemm_traits.h:326
    +
    Definition: gemm_global_tile.h:163
    +
    int8_t MultiplyAddScalar
    The scalar stored in shared memory.
    Definition: igemm_traits.h:271
    Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the comp...
    -
    Definition: gemm_global_stream.h:161
    -
    Definition: gemm_traits.h:273
    -
    GemmGlobalIteratorAb< typename GemmTileTraitsHelperB::GlobalTileTraits, Index_ > GlobalLoadIteratorB
    The iterator to load B from global memory.
    Definition: igemm_traits.h:291
    -
    IgemmContiguousGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, int8_t const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, 4 > GlobalTileTraits
    The traits class to build the iterator to load data from global memory for A^N.
    Definition: igemm_traits.h:159
    -
    int Scalar
    Definition: igemm_traits.h:342
    -
    IgemmSwizzle< Iterator_ > Transformer
    Definition: igemm_traits.h:245
    -
    Describes layouts of matrices.
    Definition: matrix_traits.h:35
    -
    IgemmTileTraitsHelperB< kLayoutB_, GemmConfig > GemmTileTraitsHelperB
    The GEMM config for B.
    Definition: igemm_traits.h:271
    +
    IgemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, int8_t const, Shape< 1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
    The traits class to build the iterator to load data from global memory for A^T.
    Definition: igemm_traits.h:216
    +
    Definition: gemm_global_stream.h:52
    +
    Definition: gemm_traits.h:191
    +
    IgemmEpilogue< IgemmEpilogueTraits< GemmConfig, EpilogueFunctor_ > > Epilogue
    The epilogue.
    Definition: igemm_traits.h:487
    +
    int Scalar
    Definition: igemm_traits.h:499
    +
    IgemmSwizzle< Iterator_ > Transformer
    Definition: igemm_traits.h:398
    +
    Defines data layouts of various matrix formats usable by TensorRef and other classes.
    Definition: matrix_traits.h:156
    +
    GemmSharedStoreTileAbTraits< int8_t, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/4, GemmConfig_::OutputTile::kW *4 >, typename GlobalTileTraits::Threads, kScalarsPerStsA > SharedStoreTileTraits
    The traits class to build the iterator to store data to shared memory for A^N.
    Definition: igemm_traits.h:185
    Definition: igemm_swizzle.h:38
    -
    Definition: igemm_traits.h:177
    -
    Definition: igemm_traits.h:265
    -
    An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
    Definition: tile_iterator.h:302
    -
    GlobalLoadStream< GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA > GlobalLoadStreamA
    The stream to load A from global memory to shared memory.
    Definition: igemm_traits.h:287
    -
    SharedLoadStream< SharedLoadIteratorB, Copy< typename SharedLoadIteratorB::Fragment > > SharedLoadStreamB
    The stream to load B from shared memory.
    Definition: igemm_traits.h:322
    +
    Definition: igemm_traits.h:259
    +
    Definition: igemm_traits.h:418
    +
    An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
    Definition: tile_iterator.h:399
    +
    IgemmTransformerA< GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA >::Transformer GlobalTransformerA
    The default transformer for A.
    Definition: igemm_traits.h:430
    Defines iterators for efficiently loading and storing tiles to and from shared memory.
    -
    Definition: matrix_traits.h:36
    -
    IgemmTileTraitsHelperA< kLayoutA_, GemmConfig > GemmTileTraitsHelperA
    The GEMM config for A.
    Definition: igemm_traits.h:269
    -
    Definition: gemm_shared_stream.h:44
    +
    GlobalLoadStream< GemmOperand::kB, GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB > GlobalLoadStreamB
    The stream to load B from global memory to shared memory.
    Definition: igemm_traits.h:460
    +
    Definition: matrix_traits.h:159
    +
    Definition: gemm_shared_stream.h:45
    +
    Definition: igemm_global_tile.h:50
    Defines a type for restructuring a tile.
    -
    TileLoadIterator< typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorA
    The iterator to load A from shared memory.
    Definition: igemm_traits.h:310
    -
    ClearAccumulators< typename MultiplyAdd::ScalarC > ClearAccumulators
    The object to clear accumulators.
    Definition: igemm_traits.h:327
    -
    Definition: gemm_traits.h:79
    -
    Definition: gemm_traits.h:137
    -
    Definition: matrix_traits.h:43
    +
    GemmTileTraitsHelperA::GlobalLoadIterator GlobalLoadIteratorA
    The iterator to load A from global memory.
    Definition: igemm_traits.h:427
    +
    Definition: gemm_config.h:76
    +
    TileStoreIterator< typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorA
    The iterator to store A to shared memory.
    Definition: igemm_traits.h:436
    +
    Definition: gemm_traits.h:52
    +
    Definition: matrix_traits.h:357
    Definition: igemm_traits.h:57
    -
    Definition: igemm_traits.h:221
    -
    Definition: igemm_global_tile.h:50
    -
    float Scalar
    Definition: igemm_traits.h:337
    -
    Definition: gemm_traits.h:428
    -
    Copy< typename Iterator_::Fragment > Transformer
    Definition: igemm_traits.h:225
    -
    Definition: igemm_traits.h:370
    +
    IgemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, int8_t const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
    The traits class to build the iterator to load data from global memory for A^N.
    Definition: igemm_traits.h:170
    +
    Definition: igemm_global_tile.h:95
    +
    Definition: igemm_traits.h:374
    +
    float Scalar
    Definition: igemm_traits.h:494
    +
    Definition: gemm_traits.h:349
    +
    Copy< typename Iterator_::Fragment > Transformer
    Definition: igemm_traits.h:378
    +
    Definition: igemm_traits.h:527
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    -
    GemmSharedStoreTileAbTraits< int8_t, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/4, GemmConfig_::OutputTile::kW *4 >, typename GlobalTileTraits::Threads, kScalarsPerStsA > SharedStoreTileTraits
    The traits class to build the iterator to store data to shared memory for A^N.
    Definition: igemm_traits.h:171
    - -
    Template performing matrix multiply-add operation within a thread.
    Definition: thread_multiply_add.h:43
    -
    Definition: matrix_traits.h:36
    - -
    IgemmEpilogue< IgemmEpilogueTraits< GemmConfig, EpilogueFunctor_ > > Epilogue
    The epilogue.
    Definition: igemm_traits.h:330
    -
    IgemmTransformerA< GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA >::Transformer GlobalTransformerA
    The default transformer for A.
    Definition: igemm_traits.h:278
    -
    Kind
    Definition: matrix_traits.h:36
    -
    Definition: igemm_traits.h:236
    -
    TileStoreIterator< typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorA
    The iterator to store A to shared memory.
    Definition: igemm_traits.h:284
    -
    Functor to compute linear combination of fragments.
    Definition: linear_scaling.h:40
    -
    Definition: matrix_traits.h:43
    +
    TileLoadIterator< typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorB
    The iterator to load B from shared memory.
    Definition: igemm_traits.h:476
    +
    GemmSharedStoreTileAbTraits< int8_t, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/4, GemmConfig_::OutputTile::kH *4 >, typename GlobalTileTraits::Threads, kScalarsPerStsB > SharedStoreTileTraits
    The traits class to build the iterator to store data to shared memory for B^N.
    Definition: igemm_traits.h:368
    +
    ReshapeThreads< VectorizedTile, Threads_ >::Threads Threads
    The threads shape.
    Definition: gemm_global_tile.h:88
    + +
    Template performing matrix multiply-add operation within a thread.
    Definition: thread_multiply_add.h:44
    +
    Definition: matrix_traits.h:159
    + + +
    IgemmConfig< OutputTile_, ScalarD_, ThreadGemmShape_ > GemmConfig
    The IGEMM config.
    Definition: igemm_traits.h:420
    +
    IgemmGlobalIteratorAb< GlobalTileTraits, Index_ > GlobalLoadIterator
    The global load iterator.
    Definition: igemm_traits.h:292
    +
    Kind
    Enumeration defining fundamental contiguous layouts.
    Definition: matrix_traits.h:159
    +
    GemmGlobalIteratorAb< GlobalTileTraits, Index_ > GlobalLoadIterator
    The global load iterator.
    Definition: igemm_traits.h:173
    +
    GemmGlobalIteratorAb< GlobalTileTraits, Index_ > GlobalLoadIterator
    The global load iterator.
    Definition: igemm_traits.h:356
    +
    GemmConfig::MultiplyAdd MultiplyAdd
    The multiply-add functor.
    Definition: igemm_traits.h:482
    +
    Definition: igemm_traits.h:389
    +
    Functor to compute linear combination of fragments.
    Definition: linear_scaling.h:51
    +
    SharedLoadStream< SharedLoadIteratorA, Copy< typename SharedLoadIteratorA::Fragment > > SharedLoadStreamA
    The stream to load A from shared memory.
    Definition: igemm_traits.h:470
    +
    Definition: matrix_traits.h:357
    +
    GlobalLoadStream< GemmOperand::kA, GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA > GlobalLoadStreamA
    The stream to load A from global memory to shared memory.
    Definition: igemm_traits.h:442
    +
    IgemmTileTraitsHelperA< kLayoutA_, GemmConfig, Index_ > GemmTileTraitsHelperA
    The GEMM config for A.
    Definition: igemm_traits.h:422
    Implements a software-pipelined efficient GEMM.
    -
    ReshapeThreads< Tile, Threads_ >::Threads Threads
    The threads shape.
    Definition: gemm_global_tile.h:87
    +
    GemmSharedLoadTileATraits< int8_t const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, 16, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
    The traits class to build the iterator to load from shared memory for A^N.
    Definition: igemm_traits.h:253
    +
    SharedLoadStream< SharedLoadIteratorB, Copy< typename SharedLoadIteratorB::Fragment > > SharedLoadStreamB
    The stream to load B from shared memory.
    Definition: igemm_traits.h:479
    Defines structural properties of the GEMM epilogue.
    -
    Definition: igemm_traits.h:336
    +
    Definition: igemm_traits.h:493
    Defines the epilogue phase of the GEMM computation for IGEMM, supporting integer and floating-point o...
    Defines conversion operations among Fragments of different base type.
    -
    GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Base
    The base config.
    Definition: igemm_traits.h:185
    -
    SharedLoadStream< SharedLoadIteratorA, Copy< typename SharedLoadIteratorA::Fragment > > SharedLoadStreamA
    The stream to load A from shared memory.
    Definition: igemm_traits.h:313
    +
    GemmSharedStoreWithSkewTileAbTraits< int8_t, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/4, GemmConfig_::OutputTile::kH *4 >, typename GlobalTileTraits::Threads, kScalarsPerStsB, 16 > SharedStoreTileTraits
    The traits class to build the iterator to store data to shared memory for B^N.
    Definition: igemm_traits.h:306
    +
    Implements tile iterators to partition the thread block tile into 2D subtiles and efficiently load ea...
    -
    An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
    Definition: tile_iterator.h:620
    -
    GemmConfig::MultiplyAdd MultiplyAdd
    The multiply-add functor.
    Definition: igemm_traits.h:325
    +
    TileLoadIterator< typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorA
    The iterator to load A from shared memory.
    Definition: igemm_traits.h:467
    +
    GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Base
    The base config.
    Definition: igemm_traits.h:152
    +
    An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
    Definition: tile_iterator.h:836
    +
    ClearAccumulators< typename MultiplyAdd::ScalarC > ClearAccumulators
    The object to clear accumulators.
    Definition: igemm_traits.h:484
    diff --git a/docs/index.html b/docs/index.html index f2ba68993..6fab15e10 100644 --- a/docs/index.html +++ b/docs/index.html @@ -75,7 +75,7 @@ $(function() {
    diff --git a/docs/iterator__access_8h.html b/docs/iterator__access_8h.html index cc41cd5af..06fd90ad6 100644 --- a/docs/iterator__access_8h.html +++ b/docs/iterator__access_8h.html @@ -82,10 +82,9 @@ $(function() {

    Free functions for loading and storing to implementations of tile iteartor concepts. More...

    -

    @@ -142,7 +146,7 @@ Namespaces

    @@ -98,76 +97,15 @@ Namespaces Functions - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    template<typename InputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator &iterator, Fragment &fragment)
     Loads a fragment from an input iterator. More...
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_DEVICE void cutlass::shared_iterator_load (InputIterator &iterator, Fragment &fragment)
     Loads a fragment from a shared memory input iterator. More...
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_DEVICE void cutlass::shared_iterator_load (InputIterator &iterator, Fragment &fragment, int d)
     Loads a fragment from a shared memory input iterator. More...
     
    template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
     Loads a fragment from an input iterator, masked by a predicate iterator. More...
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset=0)
     Loads a fragment from an input iterator. More...
     
    template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)
     Loads a fragment from an input iterator. More...
     
    template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const &_iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const &iterator, Fragment &fragment, typename InputIterator::Index offset=0)
     Loads a fragment from an input iterator. More...
     
    template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)
     Loads a fragment from an input iterator. More...
     
    template<typename OutputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator &iterator, Fragment &fragment)
     Stores a fragment to an output iterator. More...
     
    template<typename OutputIterator , typename Fragment >
    CUTLASS_DEVICE void cutlass::shared_iterator_store (OutputIterator &iterator, Fragment const &fragment)
     Stores a fragment to a shared memory output iterator. More...
     
    template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
     Stores a fragment to an output iterator, masked by a predicate iterator. More...
     
    template<typename OutputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)
     Stores a fragment to an output iterator. More...
     
    template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)
     Stores a fragment to an output iterator. More...
     
    template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const &_iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
     Stores a fragment to an output iterator, masked by a predicate iterator. More...
     
    template<typename OutputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)
     Stores a fragment to an output iterator. More...
     
    template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)
     Stores a fragment to an output iterator. More...
     
    diff --git a/docs/iterator__access_8h_source.html b/docs/iterator__access_8h_source.html index 11289a933..fac9ea1e5 100644 --- a/docs/iterator__access_8h_source.html +++ b/docs/iterator__access_8h_source.html @@ -76,30 +76,18 @@ $(function() {
    iterator_access.h
    -Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    31 #include <cutlass/load_store.h>
    33 #include <cutlass/shape.h>
    34 
    35 namespace cutlass {
    36 
    38 
    40 template <typename InputIterator, typename Fragment>
    41 CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment) {
    42  typename InputIterator::FragmentIterator frag_iterator(fragment);
    43  for (int d = 0; d < InputIterator::Iterations::kD; ++d) {
    44  for (int h = 0; h < InputIterator::Iterations::kH; ++h) {
    45  for (int w = 0; w < InputIterator::Iterations::kW; ++w) {
    46  for (int c = 0; c < InputIterator::Iterations::kC; ++c) {
    47  if (iterator.valid(d, h, w, c)) {
    48  int const offset =
    50  0, 0, w, c);
    52  load(reinterpret_cast<typename InputIterator::AccessType &>(
    53  frag_iterator.at(d, h, w, c)),
    54  iterator.data(),
    55  offset);
    56  }
    57  }
    58  if (w < InputIterator::Iterations::kW - 1) {
    59  iterator.inc_w();
    60  }
    61  }
    62  if (h < InputIterator::Iterations::kH - 1) {
    63  iterator.inc_h();
    64  }
    65  }
    66  if (d < InputIterator::Iterations::kD - 1) {
    67  iterator.inc_d();
    68  }
    69  }
    70  iterator.inc_advance();
    71 }
    72 
    74 template <typename InputIterator, typename Fragment>
    75 CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment) {
    76  typename InputIterator::FragmentIterator frag_iterator(fragment);
    77  for (int d = 0; d < InputIterator::Iterations::kD; ++d) {
    78  for (int h = 0; h < InputIterator::Iterations::kH; ++h) {
    79  for (int w = 0; w < InputIterator::Iterations::kW; ++w) {
    80  for (int c = 0; c < InputIterator::Iterations::kC; ++c) {
    81  int const offset =
    83  d, h, w, c);
    84 
    85  FragmentLoad<InputIterator::kIteratorFragment,
    86  InputIterator::Tile::kC,
    87  typename InputIterator::Scalar,
    88  InputIterator::kMemorySpace,
    89  typename InputIterator::FragmentElement,
    90  InputIterator::Tile::kW>::load(frag_iterator.at(d, h, w, c),
    91  iterator.data(),
    92  offset);
    93  }
    94  }
    95  }
    96  }
    97 }
    98 
    100 template <typename InputIterator, typename Fragment>
    101 CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment, int d) {
    102  typename InputIterator::FragmentIterator frag_iterator(fragment);
    103  for (int h = 0; h < InputIterator::Iterations::kH; ++h) {
    104  for (int w = 0; w < InputIterator::Iterations::kW; ++w) {
    105  for (int c = 0; c < InputIterator::Iterations::kC; ++c) {
    106  int const offset =
    108  d, h, w, c);
    109 
    110  FragmentLoad<InputIterator::kIteratorFragment,
    111  InputIterator::Tile::kC,
    112  typename InputIterator::Scalar,
    113  InputIterator::kMemorySpace,
    114  typename InputIterator::FragmentElement,
    115  InputIterator::Tile::kW>::load(frag_iterator.at(0, h, w, c),
    116  iterator.data(),
    117  offset);
    118  }
    119  }
    120  }
    121 }
    122 
    124 template <typename InputIterator, typename Fragment, typename ConstPredicateAdapter>
    126  Fragment &fragment,
    127  typename InputIterator::Index offset,
    128  ConstPredicateAdapter predicate_adapter) {
    129  for (int d = 0; d < InputIterator::Iterations::kD; ++d, iterator.inc_d()) {
    130  for (int h = 0; h < InputIterator::Iterations::kH; ++h, iterator.inc_h()) {
    131  for (int w = 0; w < InputIterator::Iterations::kW; ++w, iterator.inc_w()) {
    132  if (predicate_adapter.at(d, h, w, 0)) {
    133  int idx = InputIterator::Tile::kC *
    134  (w + InputIterator::Iterations::kW * (h + InputIterator::Iterations::kH * d));
    135 
    137  load(reinterpret_cast<typename InputIterator::AccessType &>(fragment[idx]),
    138  iterator.data(),
    139  offset);
    140  }
    141  }
    142  }
    143  }
    144 }
    145 
    147 template <typename InputIterator, typename Fragment>
    149  Fragment &fragment,
    150  typename InputIterator::Index offset = 0) {
    152  iterator_load_post_increment(iterator, fragment, offset, pred);
    153 }
    154 
    156 template <typename InputIterator, typename Fragment, typename ConstPredicateAdapter>
    158  Fragment &fragment,
    159  ConstPredicateAdapter pred_it) {
    160  iterator_load_post_increment(iterator, fragment, 0, pred_it);
    161 }
    162 
    163 template <typename InputIterator, typename Fragment, typename ConstPredicateAdapter>
    164 CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &_iterator,
    165  Fragment &fragment,
    166  typename InputIterator::Index offset,
    167  ConstPredicateAdapter predicate_adapter) {
    168  InputIterator iterator(_iterator);
    169  iterator_load_post_increment(iterator, fragment, offset, predicate_adapter);
    170 }
    171 
    173 template <typename InputIterator, typename Fragment>
    174 CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &iterator,
    175  Fragment &fragment,
    176  typename InputIterator::Index offset = 0) {
    178  iterator_load(iterator, fragment, offset, pred);
    179 }
    180 
    182 template <typename InputIterator, typename Fragment, typename ConstPredicateAdapter>
    183 CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &iterator,
    184  Fragment &fragment,
    185  ConstPredicateAdapter pred_it) {
    186  iterator_load(iterator, fragment, 0, pred_it);
    187 }
    188 
    190 
    192 template <typename OutputIterator, typename Fragment>
    193 CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment) {
    194  typename OutputIterator::FragmentIterator frag_iterator(fragment);
    195  for (int d = 0; d < OutputIterator::Iterations::kD; ++d) {
    196  for (int h = 0; h < OutputIterator::Iterations::kH; ++h) {
    197  for (int w = 0; w < OutputIterator::Iterations::kW; ++w) {
    198  if (iterator.valid(d, h, w, 0)) {
    199  int const offset =
    201  d, h, w, 0);
    202 
    203  Store<typename Fragment::Element,
    204  OutputIterator::Tile::kC,
    205  OutputIterator::kMemorySpace>::
    206  store(reinterpret_cast<typename OutputIterator::AccessType &>(
    207  frag_iterator.at(d, h, w, 0)),
    208  iterator.data(),
    209  offset);
    210  }
    211  if (w < OutputIterator::Iterations::kW - 1) {
    212  iterator.inc_w();
    213  }
    214  }
    215  if (h < OutputIterator::Iterations::kH - 1) {
    216  iterator.inc_h();
    217  }
    218  }
    219  if (d < OutputIterator::Iterations::kD - 1) {
    220  iterator.inc_d();
    221  }
    222  }
    223  iterator.inc_advance();
    224 }
    225 
    227 template <typename OutputIterator, typename Fragment>
    228 CUTLASS_DEVICE void shared_iterator_store(OutputIterator &iterator, Fragment const &fragment) {
    229  typename OutputIterator::FragmentConstIterator frag_iterator(fragment);
    230  for (int d = 0; d < OutputIterator::Iterations::kD; ++d) {
    231  for (int h = 0; h < OutputIterator::Iterations::kH; ++h) {
    232  for (int w = 0; w < OutputIterator::Iterations::kW; ++w) {
    233  for (int c = 0; c < OutputIterator::Iterations::kC; ++c) {
    234  int const offset =
    236  d, h, w, c);
    237 
    238  FragmentStore<OutputIterator::kIteratorFragment,
    239  OutputIterator::Tile::kC,
    240  typename OutputIterator::Scalar,
    241  OutputIterator::kMemorySpace,
    242  typename OutputIterator::FragmentElement,
    243  OutputIterator::Tile::kW>::store(frag_iterator.at(d, h, w, c),
    244  iterator.data(),
    245  offset);
    246  }
    247  }
    248  }
    249  }
    250 }
    251 
    253 
    255 template <typename OutputIterator, typename Fragment, typename ConstPredicateAdapter>
    257  Fragment const &fragment,
    258  typename OutputIterator::Index offset,
    259  ConstPredicateAdapter predicate_adapter) {
    260  for (int d = 0; d < OutputIterator::Iterations::kD; ++d, iterator.inc_d()) {
    261  for (int h = 0; h < OutputIterator::Iterations::kH; ++h, iterator.inc_h()) {
    262  for (int w = 0; w < OutputIterator::Iterations::kW; ++w, iterator.inc_w()) {
    263  if (predicate_adapter.at(d, h, w, 0)) {
    264  int idx = OutputIterator::Tile::kC *
    265  (w + OutputIterator::Iterations::kW * (h + OutputIterator::Iterations::kH * d));
    266 
    267  Store<typename Fragment::Element,
    268  OutputIterator::Tile::kC,
    269  OutputIterator::kMemorySpace>::
    270  store(reinterpret_cast<typename OutputIterator::AccessType const &>(fragment[idx]),
    271  iterator.data(),
    272  offset);
    273  }
    274  }
    275  }
    276  }
    277 }
    278 
    280 template <typename OutputIterator, typename Fragment>
    282  Fragment const &fragment,
    283  typename OutputIterator::Index offset = 0) {
    285  iterator_store_post_increment(iterator, fragment, offset, pred);
    286 }
    287 
    289 template <typename OutputIterator, typename Fragment, typename ConstPredicateAdapter>
    291  Fragment const &fragment,
    292  ConstPredicateAdapter pred_it) {
    293  iterator_store_post_increment(iterator, fragment, 0, pred_it);
    294 }
    295 
    297 template <typename OutputIterator, typename Fragment, typename ConstPredicateAdapter>
    298 CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &_iterator,
    299  Fragment const &fragment,
    300  typename OutputIterator::Index offset,
    301  ConstPredicateAdapter predicate_adapter) {
    302  OutputIterator iterator(_iterator);
    303  iterator_store_post_increment(iterator, fragment, offset, predicate_adapter);
    304 }
    305 
    307 template <typename OutputIterator, typename Fragment>
    308 CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &iterator,
    309  Fragment const &fragment,
    310  typename OutputIterator::Index offset = 0) {
    312  iterator_store(iterator, fragment, offset, pred);
    313 }
    314 
    316 template <typename OutputIterator, typename Fragment, typename ConstPredicateAdapter>
    317 CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &iterator,
    318  Fragment const &fragment,
    319  ConstPredicateAdapter pred_it) {
    320  iterator_store(iterator, fragment, 0, pred_it);
    321 }
    322 
    324 
    325 } // namespace cutlass
    Definition: fragment_load_store.h:43
    -
    Definition: convert.h:33
    -
    CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment)
    Loads a fragment from a shared memory input iterator.
    Definition: iterator_access.h:75
    -
    CUTLASS_HOST_DEVICE void iterator_store_post_increment(OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
    Stores a fragment to an output iterator, masked by a predicate iterator.
    Definition: iterator_access.h:256
    -
    Defines accessors for loading and storing fragments to memory efficiently.
    -
    static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
    The load function.
    Definition: load_store.h:59
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include "cutlass/load_store.h"
    32 #include "cutlass/shape.h"
    33 
    34 namespace cutlass {
    35 
    37 // Used by convolution
    38 template <typename InputIterator, typename Fragment>
    39 CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment) {
    40  typename InputIterator::FragmentIterator frag_iterator(fragment);
    41  for (int d = 0; d < InputIterator::Iterations::kD; ++d) {
    42  for (int h = 0; h < InputIterator::Iterations::kH; ++h) {
    43  for (int w = 0; w < InputIterator::Iterations::kW; ++w) {
    44  for (int c = 0; c < InputIterator::Iterations::kC; ++c) {
    45  if (iterator.valid(d, h, w, c)) {
    46  iterator.load_element(reinterpret_cast<typename InputIterator::AccessType &>(
    47  frag_iterator.at(d, h, w, c)),
    48  d,
    49  h,
    50  w,
    51  c);
    52  }
    53  }
    54  if (w < InputIterator::Iterations::kW - 1) {
    55  iterator.inc_w();
    56  }
    57  }
    58  if (h < InputIterator::Iterations::kH - 1) {
    59  iterator.inc_h();
    60  }
    61  }
    62  if (d < InputIterator::Iterations::kD - 1) {
    63  iterator.inc_d();
    64  }
    65  }
    66  iterator.inc_advance();
    67 }
    68 
    69 template <typename OutputIterator, typename Fragment>
    70 CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment) {
    71  typename OutputIterator::FragmentIterator frag_iterator(fragment);
    72  for (int d = 0; d < OutputIterator::Iterations::kD; ++d) {
    73  for (int h = 0; h < OutputIterator::Iterations::kH; ++h) {
    74  for (int w = 0; w < OutputIterator::Iterations::kW; ++w) {
    75  for (int c = 0; c < OutputIterator::Iterations::kC; ++c) {
    76  if (iterator.valid(d, h, w, c)) {
    77  iterator.store_element(reinterpret_cast<typename OutputIterator::AccessType &>(
    78  frag_iterator.at(d, h, w, c)),
    79  d,
    80  h,
    81  w,
    82  c);
    83  }
    84  }
    85  if (w < OutputIterator::Iterations::kW - 1) {
    86  iterator.inc_w();
    87  }
    88  }
    89  if (h < OutputIterator::Iterations::kH - 1) {
    90  iterator.inc_h();
    91  }
    92  }
    93  if (d < OutputIterator::Iterations::kD - 1) {
    94  iterator.inc_d();
    95  }
    96  }
    97  iterator.inc_advance();
    98 }
    100 
    101 } // namespace cutlass
    Definition: convert.h:33
    A template defining Fragment Concept.
    Definition: fragment.h:99
    -
    Definition: load_store.h:131
    Defines container classes and iterators for managing a statically sized vector of boolean predicates...
    -
    static CUTLASS_DEVICE int get(int d, int h, int w, int c)
    Definition: shape.h:211
    -
    CUTLASS_HOST_DEVICE void iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
    Loads a fragment from an input iterator, masked by a predicate iterator.
    Definition: iterator_access.h:125
    Defines abstractions for efficiently loading and storing vectors to memory.
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    -
    CUTLASS_DEVICE void shared_iterator_store(OutputIterator &iterator, Fragment const &fragment)
    Stores a fragment to a shared memory output iterator.
    Definition: iterator_access.h:228
    -
    Element_ Element
    The element.
    Definition: fragment.h:108
    -
    Always returns true predicate.
    Definition: predicate_vector.h:426
    -
    CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment)
    Stores a fragment to an output iterator.
    Definition: iterator_access.h:193
    -
    Definition: fragment_load_store.h:91
    -
    CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment)
    Loads a fragment from an input iterator.
    Definition: iterator_access.h:41
    +
    CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment)
    Definition: iterator_access.h:70
    +
    CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment)
    Definition: iterator_access.h:39
    Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.
    diff --git a/docs/kernel__launch_8h.html b/docs/kernel__launch_8h.html new file mode 100644 index 000000000..192d541b3 --- /dev/null +++ b/docs/kernel__launch_8h.html @@ -0,0 +1,108 @@ + + + + + + + +Cutlass: kernel_launch.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    kernel_launch.h File Reference
    +
    +
    + +

    Defines structures and helpers to launch CUDA kernels within CUTLASS. +More...

    +
    #include "cutlass/cutlass.h"
    +
    +

    Go to the source code of this file.

    + + + + + +

    +Classes

    struct  cutlass::KernelLaunchConfiguration
     Structure containing the basic launch configuration of a CUDA kernel. More...
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/kernel__launch_8h_source.html b/docs/kernel__launch_8h_source.html new file mode 100644 index 000000000..52c7a5e07 --- /dev/null +++ b/docs/kernel__launch_8h_source.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: kernel_launch.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    kernel_launch.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include "cutlass/cutlass.h"
    32 
    33 namespace cutlass {
    34 
    36 
    39 
    41  dim3 grid;
    42 
    44  dim3 block;
    45 
    47  size_t dynamic_smem;
    48 
    49  //
    50  // Methods
    51  //
    52 
    56  dim3 _grid = dim3(1,1,1),
    57  dim3 _block = dim3(1,1,1),
    58  size_t _dynamic_smem = 0
    59  ):
    60  grid(_grid),
    61  block(_block),
    62  dynamic_smem(_dynamic_smem) { }
    63 };
    64 
    66 
    67 } // namespace cutlass
    CUTLASS_HOST_DEVICE KernelLaunchConfiguration(dim3 _grid=dim3(1, 1, 1), dim3 _block=dim3(1, 1, 1), size_t _dynamic_smem=0)
    Constructs a KernellaunchConfiguration object.
    Definition: kernel_launch.h:55
    +
    Definition: convert.h:33
    +
    Structure containing the basic launch configuration of a CUDA kernel.
    Definition: kernel_launch.h:38
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    size_t dynamic_smem
    Bytes of dynamically allocated SMEM in addition to static SMEM.
    Definition: kernel_launch.h:47
    +
    dim3 block
    CUDA threablock dimensions.
    Definition: kernel_launch.h:44
    +
    dim3 grid
    CUDA grid dimensions.
    Definition: kernel_launch.h:41
    +
    Basic include for CUTLASS macros.
    +
    + + + + diff --git a/docs/linear__scaling_8h.html b/docs/linear__scaling_8h.html index 060be3aa3..132c09d48 100644 --- a/docs/linear__scaling_8h.html +++ b/docs/linear__scaling_8h.html @@ -74,7 +74,8 @@ $(function() {
    linear_scaling.h File Reference
    @@ -82,7 +83,7 @@ $(function() {

    Implements the BLAS linear scaling function alpha*AB + beta*C. More...

    -
    #include <cutlass/fragment_multiply_add.h>
    +

    Go to the source code of this file.

    @@ -101,11 +102,19 @@ Namespaces +
     
     cutlass::gemm
     
    + + + + + +

    +Functions

    template<typename T >
    CUTLASS_DEVICE bool cutlass::gemm::is_zero (T x)
     
    CUTLASS_DEVICE bool cutlass::gemm::is_zero (half x)
     
    diff --git a/docs/linear__scaling_8h_source.html b/docs/linear__scaling_8h_source.html index d9817ed09..b00e58598 100644 --- a/docs/linear__scaling_8h_source.html +++ b/docs/linear__scaling_8h_source.html @@ -76,25 +76,33 @@ $(function() {
    linear_scaling.h
    -Go to the documentation of this file.
    1 
    2 /***************************************************************************************************
    3  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without modification, are permitted
    6  * provided that the following conditions are met:
    7  * * Redistributions of source code must retain the above copyright notice, this list of
    8  * conditions and the following disclaimer.
    9  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    10  * conditions and the following disclaimer in the documentation and/or other materials
    11  * provided with the distribution.
    12  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    13  * to endorse or promote products derived from this software without specific prior written
    14  * permission.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    18  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    20  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    21  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    22  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    24  *
    25  **************************************************************************************************/
    29 #pragma once
    30 
    32 
    33 namespace cutlass {
    34 namespace gemm {
    35 
    37 
    39 template <typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_> >
    40 struct LinearScaling {
    41  // The scalar.
    42  typedef Scalar_ Scalar;
    43  // The adapater.
    44  typedef FragmentMultiplyAdd_ FragmentMultiplyAdd;
    45 
    47  struct Params {
    50 
    52  template <typename GemmDesc_>
    53  CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) {
    54  alpha = desc.alpha;
    55  beta = desc.beta;
    56  return 0;
    57  }
    58  };
    59 
    61  CUTLASS_DEVICE LinearScaling(Params const& params) : alpha(params.alpha), beta(params.beta) {}
    62 
    64  template <typename Fragment_>
    65  CUTLASS_DEVICE void evaluate(Fragment_ const& accum, Fragment_& output) {
    67  mad.multiply(alpha, accum, output);
    68  }
    69 
    71  template <typename Fragment_>
    72  CUTLASS_DEVICE void evaluate(Fragment_ const& accum, Fragment_ const& old, Fragment_& output) {
    74  Fragment_ tmp;
    75  mad.multiply(beta, old, tmp);
    76  mad.multiply_add(alpha, accum, tmp, output);
    77  }
    78 
    81 };
    82 
    84 
    85 } // namespace gemm
    86 } // namespace cutlass
    Definition: convert.h:33
    -
    Scalar alpha
    The alpha/beta scaling params.
    Definition: linear_scaling.h:49
    -
    Scalar alpha
    The alpha/beta scaling factors.
    Definition: linear_scaling.h:80
    -
    CUTLASS_DEVICE LinearScaling(Params const &params)
    Ctor.
    Definition: linear_scaling.h:61
    -
    CUTLASS_DEVICE void evaluate(Fragment_ const &accum, Fragment_ const &old, Fragment_ &output)
    Evaluate the functor.
    Definition: linear_scaling.h:72
    -
    Scalar beta
    Definition: linear_scaling.h:49
    -
    CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const &desc)
    Initialize the parameters.
    Definition: linear_scaling.h:53
    -
    Scalar beta
    Definition: linear_scaling.h:80
    +Go to the documentation of this file.
    1 
    2 /***************************************************************************************************
    3  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without modification, are permitted
    6  * provided that the following conditions are met:
    7  * * Redistributions of source code must retain the above copyright notice, this list of
    8  * conditions and the following disclaimer.
    9  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    10  * conditions and the following disclaimer in the documentation and/or other materials
    11  * provided with the distribution.
    12  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    13  * to endorse or promote products derived from this software without specific prior written
    14  * permission.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    18  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    20  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    21  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    22  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    24  *
    25  **************************************************************************************************/
    29 #pragma once
    30 
    32 
    33 namespace cutlass {
    34 namespace gemm {
    35 
    37 
    38 template <typename T>
    39 CUTLASS_DEVICE bool is_zero(T x) {
    40  return x == T(0);
    41 }
    42 
    43 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
    44 CUTLASS_DEVICE bool is_zero(half x) { return reinterpret_cast<int16_t&>(x) == int16_t(0); }
    45 #endif
    46 
    48 
    50 template <typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_> >
    51 struct LinearScaling {
    52  // The scalar.
    53  typedef Scalar_ Scalar;
    54  // The accumulator Type
    55  typedef typename FragmentMultiplyAdd_::ScalarAccum ScalarAccum;
    56  // The adapater.
    57  typedef FragmentMultiplyAdd_ FragmentMultiplyAdd;
    58 
    60  struct Params {
    63 
    64  //
    65  // Methods
    66  //
    67 
    68  // Constructor
    70  Params(Scalar _alpha = 0, Scalar _beta = 0) : alpha(_alpha), beta(_beta) {}
    71 
    74  alpha = _alpha;
    75  beta = _beta;
    76  return 0;
    77  }
    78 
    80  template <typename GemmDesc_>
    81  CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) {
    82  alpha = desc.alpha;
    83  beta = desc.beta;
    84  return 0;
    85  }
    86  };
    87 
    88  //
    89  // Data members
    90  //
    91 
    93 
    94  //
    95  // Methods
    96  //
    97 
    99  CUTLASS_DEVICE LinearScaling() { }
    100 
    102  CUTLASS_DEVICE LinearScaling(Params const& _params) : params(_params) {}
    103 
    107  CUTLASS_DEVICE
    108  bool source_required() const {
    109  return !is_zero(params.beta);
    110  }
    111 
    113  template <typename FragmentA_, typename FragmentB_>
    114  CUTLASS_DEVICE void evaluate(FragmentA_ const& accum, FragmentB_& output) {
    116  mad.multiply(params.alpha, accum, output);
    117 
    118  }
    119 
    121  template <typename ScalarAccum, typename ScalarOutput, int size>
    122  CUTLASS_DEVICE void evaluate(ScalarAccum const *accum, ScalarOutput *output) {
    123  Fragment<ScalarAccum, size> FragAccum;
    124  Fragment<ScalarOutput, size> FragOutput;
    125 #pragma unroll
    126  for (int i = 0; i < size; i++) {
    127  FragAccum[i] = accum[i];
    128  FragOutput[i] = output[i];
    129  }
    130  evaluate(FragAccum, FragOutput);
    131 #pragma unroll
    132  for (int i = 0; i < size; i++) {
    133  output[i] = FragOutput[i];
    134  }
    135  }
    136 
    138  template <typename FragmentA_, typename FragmentB_>
    139  CUTLASS_DEVICE void evaluate(FragmentA_ const& accum, FragmentB_ const& old, FragmentB_& output) {
    141  FragmentB_ tmp;
    142  mad.multiply(params.beta, old, tmp);
    143  mad.multiply_add(params.alpha, accum, tmp, output);
    144  }
    145 
    147  template <typename ScalarAccum, typename ScalarOutput, int size>
    148  CUTLASS_DEVICE void evaluate(ScalarAccum const *accum, ScalarOutput const *old, ScalarOutput *output) {
    149  Fragment<ScalarAccum, size> FragAccum;
    150  Fragment<ScalarOutput, size> FragOutput;
    152 #pragma unroll
    153  for (int i = 0; i < size; i++) {
    154  FragAccum[i] = accum[i];
    155  FragOutput[i] = output[i];
    156  FragOld[i] = old[i];
    157  }
    158  evaluate(FragAccum, FragOld, FragOutput);
    159 #pragma unroll
    160  for (int i = 0; i < size; i++) {
    161  output[i] = FragOutput[i];
    162  }
    163  }
    164 };
    165 
    167 
    168 } // namespace gemm
    169 } // namespace cutlass
    CUTLASS_HOST_DEVICE int initialize(Scalar _alpha, Scalar _beta)
    Initialize the parameters.
    Definition: linear_scaling.h:73
    +
    Definition: convert.h:33
    +
    Scalar alpha
    The alpha/beta scaling params.
    Definition: linear_scaling.h:62
    +
    CUTLASS_DEVICE bool source_required() const
    Definition: linear_scaling.h:108
    +
    CUTLASS_DEVICE void evaluate(ScalarAccum const *accum, ScalarOutput *output)
    Evaluate the functor, without using fragment in the API.
    Definition: linear_scaling.h:122
    +
    CUTLASS_DEVICE void evaluate(FragmentA_ const &accum, FragmentB_ const &old, FragmentB_ &output)
    Evaluate the functor.
    Definition: linear_scaling.h:139
    +
    CUTLASS_DEVICE void evaluate(FragmentA_ const &accum, FragmentB_ &output)
    Evaluate the functor.
    Definition: linear_scaling.h:114
    +
    Scalar beta
    Definition: linear_scaling.h:62
    +
    A template defining Fragment Concept.
    Definition: fragment.h:99
    +
    Params params
    Definition: linear_scaling.h:92
    +
    FragmentMultiplyAdd_::ScalarAccum ScalarAccum
    Definition: linear_scaling.h:55
    +
    CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const &desc)
    Initialize the parameters.
    Definition: linear_scaling.h:81
    Defines multiply-add operations on fragments within a thread.
    -
    FragmentMultiplyAdd_ FragmentMultiplyAdd
    Definition: linear_scaling.h:44
    +
    FragmentMultiplyAdd_ FragmentMultiplyAdd
    Definition: linear_scaling.h:57
    +
    CUTLASS_DEVICE LinearScaling()
    Ctor.
    Definition: linear_scaling.h:99
    +
    CUTLASS_DEVICE bool is_zero(T x)
    Definition: linear_scaling.h:39
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    -
    CUTLASS_DEVICE void evaluate(Fragment_ const &accum, Fragment_ &output)
    Evaluate the functor.
    Definition: linear_scaling.h:65
    -
    The parameters.
    Definition: linear_scaling.h:47
    -
    Functor to compute linear combination of fragments.
    Definition: linear_scaling.h:40
    -
    Scalar_ Scalar
    Definition: linear_scaling.h:42
    +
    CUTLASS_DEVICE LinearScaling(Params const &_params)
    Ctor.
    Definition: linear_scaling.h:102
    +
    The parameters.
    Definition: linear_scaling.h:60
    +
    Functor to compute linear combination of fragments.
    Definition: linear_scaling.h:51
    +
    Scalar_ Scalar
    Definition: linear_scaling.h:53
    +
    CUTLASS_DEVICE void evaluate(ScalarAccum const *accum, ScalarOutput const *old, ScalarOutput *output)
    Evaluate the functor, without using fragment in the API.
    Definition: linear_scaling.h:148
    +
    CUTLASS_HOST_DEVICE Params(Scalar _alpha=0, Scalar _beta=0)
    Definition: linear_scaling.h:70
    diff --git a/docs/linear__scaling__device__ptr_8h.html b/docs/linear__scaling__device__ptr_8h.html new file mode 100644 index 000000000..ad2add1db --- /dev/null +++ b/docs/linear__scaling__device__ptr_8h.html @@ -0,0 +1,114 @@ + + + + + + + +Cutlass: linear_scaling_device_ptr.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    linear_scaling_device_ptr.h File Reference
    +
    +
    + +

    Implements the BLAS linear scaling function alpha*AB + beta*C. +More...

    + +

    Go to the source code of this file.

    + + + + + + + +

    +Classes

    struct  cutlass::gemm::LinearScalingDevicePtr< Scalar_, FragmentMultiplyAdd_ >
     
    class  cutlass::gemm::LinearScalingDevicePtr< Scalar_, FragmentMultiplyAdd_ >::Params
     The parameters. More...
     
    + + + + + +

    +Namespaces

     cutlass
     
     cutlass::gemm
     
    +
    + + + + diff --git a/docs/linear__scaling__device__ptr_8h_source.html b/docs/linear__scaling__device__ptr_8h_source.html new file mode 100644 index 000000000..2fae588f7 --- /dev/null +++ b/docs/linear__scaling__device__ptr_8h_source.html @@ -0,0 +1,109 @@ + + + + + + + +Cutlass: linear_scaling_device_ptr.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    linear_scaling_device_ptr.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include "cutlass/cutlass.h"
    33 
    34 namespace cutlass {
    35 
    37 
    38 namespace gemm {
    39 
    41 
    45 template <typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_, Scalar_> >
    46 struct LinearScalingDevicePtr : public LinearScaling<Scalar_, FragmentMultiplyAdd_> {
    47 
    50 
    51  // The scalar.
    52  typedef typename Base::Scalar Scalar;
    53 
    55  class Params {
    56  private:
    59 
    62 
    63  public:
    64  //
    65  // Methods
    66  //
    67 
    68  // Constructor
    70  Params() {}
    71 
    72  // Constructor
    75  Scalar alpha,
    76  Scalar beta
    77  ):
    78  alpha_(alpha),
    79  beta_(beta) {}
    80 
    81  // Constructor
    84  Scalar const *alpha_ptr,
    85  Scalar const *beta_ptr
    86  ):
    87  alpha_(alpha_ptr),
    88  beta_(alpha_ptr) {}
    89 
    92  Scalar alpha,
    93  Scalar beta) {
    94 
    95  alpha_ = alpha;
    96  beta_ = beta;
    97 
    98  return 0;
    99  }
    100 
    103  Scalar const *alpha,
    104  Scalar const *beta) {
    105 
    106  alpha_ = alpha;
    107  beta_= beta;
    108 
    109  return 0;
    110  }
    111 
    113  template <typename GemmDesc_>
    114  CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) {
    115 
    116  alpha_ = desc.alpha;
    117  beta_ = desc.beta;
    118 
    119  return 0;
    120  }
    121 
    124  Scalar alpha() const {
    125  return alpha_;
    126  }
    127 
    130  Scalar beta() const {
    131  return beta_;
    132  }
    133  };
    134 
    135  //
    136  // Methods
    137  //
    138 
    141  this->params.alpha = _params.alpha();
    142  this->params.beta = _params.beta();
    143  }
    144 };
    145 
    147 
    148 } // namespace gemm
    149 } // namespace cutlass
    CUTLASS_HOST_DEVICE int initialize(Scalar const *alpha, Scalar const *beta)
    Initialize the parameters.
    Definition: linear_scaling_device_ptr.h:102
    +
    The parameters.
    Definition: linear_scaling_device_ptr.h:55
    +
    Definition: convert.h:33
    +
    CUTLASS_HOST_DEVICE Params(Scalar const *alpha_ptr, Scalar const *beta_ptr)
    Definition: linear_scaling_device_ptr.h:83
    +
    Implements the BLAS linear scaling function alpha*AB + beta*C.
    +
    Implements the BLAS linear scaling function alpha*AB + beta*C.
    +
    CUTLASS_HOST_DEVICE int initialize(Scalar alpha, Scalar beta)
    Initialize the parameters.
    Definition: linear_scaling_device_ptr.h:91
    +
    Params params
    Definition: linear_scaling.h:92
    +
    LinearScaling< Scalar_, FragmentMultiplyAdd_ > Base
    Linear Scaling class used.
    Definition: linear_scaling_device_ptr.h:49
    +
    CUTLASS_HOST_DEVICE Params()
    Definition: linear_scaling_device_ptr.h:70
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    CUTLASS_HOST_DEVICE Params(Scalar alpha, Scalar beta)
    Definition: linear_scaling_device_ptr.h:74
    +
    CUTLASS_HOST_DEVICE Scalar beta() const
    Gets the beta scalar.
    Definition: linear_scaling_device_ptr.h:130
    +
    CUTLASS_HOST_DEVICE LinearScalingDevicePtr(Params const &_params)
    Ctor.
    Definition: linear_scaling_device_ptr.h:140
    +
    CUTLASS_HOST_DEVICE Scalar alpha() const
    Gets the alpha scalar.
    Definition: linear_scaling_device_ptr.h:124
    +
    Definition: linear_scaling_device_ptr.h:46
    +
    Functor to compute linear combination of fragments.
    Definition: linear_scaling.h:51
    +
    Scalar_ Scalar
    Definition: linear_scaling.h:53
    +
    Base::Scalar Scalar
    Definition: linear_scaling_device_ptr.h:52
    +
    Basic include for CUTLASS macros.
    +
    CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const &desc)
    Initialize the parameters.
    Definition: linear_scaling_device_ptr.h:114
    + +
    + + + + diff --git a/docs/load__store_8h.html b/docs/load__store_8h.html index b23ec3cbf..30a4e7334 100644 --- a/docs/load__store_8h.html +++ b/docs/load__store_8h.html @@ -82,7 +82,7 @@ $(function() {

    Defines abstractions for efficiently loading and storing vectors to memory. More...

    -
    #include <cutlass/vector.h>
    +
    #include "cutlass/vector.h"

    Go to the source code of this file.

    @@ -91,25 +91,43 @@ Classes - + + - + - + + - + - + - + - + - + - + - + + + + + + + + + + + + + + + + +
    struct  cutlass::MemorySpace
     Enum to specify which memory space data resides in. More...
     
    struct  cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t >
    struct  cutlass::FragmentElementType
     Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix. More...
     
    struct  cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >
    struct  cutlass::Load< Scalar_, kAccessSize, Memory_, kFragmentElementType, FragmentElement_, kStride, size >
     
    struct  cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >
    struct  cutlass::Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, 1, 2 >
     Partial specialization for 16b loads. More...
     
    struct  cutlass::Load< double, 2, Memory_, true, 16 >
    struct  cutlass::Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 4 >
     
    struct  cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >
    struct  cutlass::Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 8 >
     
    struct  cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t >
    struct  cutlass::Load< double, 2, Memory_, FragmentElementType::kScalar, double, kStride, 16 >
     
    struct  cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >
    struct  cutlass::Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 16 >
     
    struct  cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >
    struct  cutlass::Store< Scalar_, kAccessSize, Memory_, kFragmentElementType, FragmentElement_, kStride, size >
     
    struct  cutlass::Store< double, 2, Memory_, true, 16 >
    struct  cutlass::Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, 1, 2 >
     
    struct  cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >
    struct  cutlass::Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 4 >
     
    struct  cutlass::Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 8 >
     
    struct  cutlass::Store< double, 2, Memory_, FragmentElementType::kScalar, double, kStride, 16 >
     
    struct  cutlass::Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 16 >
     
    struct  cutlass::Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size >
     
    struct  cutlass::Load< Vector< bin1_t, 32 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size >
     
    struct  cutlass::Load< Vector< int4_t, 8 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size >
     
    struct  cutlass::Load< Vector< uint4_t, 8 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size >
     
    struct  cutlass::Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size >
     
    diff --git a/docs/load__store_8h_source.html b/docs/load__store_8h_source.html index e421cbf27..9fc9c8668 100644 --- a/docs/load__store_8h_source.html +++ b/docs/load__store_8h_source.html @@ -76,41 +76,64 @@ $(function() {
    load_store.h
    -Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/vector.h>
    31 
    32 namespace cutlass {
    33 
    35 
    39 struct MemorySpace {
    40  enum Kind {
    41  kGeneric, // Data accessed through pointer dereferencing
    42  kShared, // Data resides in shared memory
    43  kGlobal // Data resides in global memory
    44  };
    45 };
    46 
    48 
    49 template <typename Scalar_,
    50  int Lanes_,
    51  MemorySpace::Kind Memory_,
    52  bool = (Lanes_ > 1),
    53  size_t = (sizeof(Scalar_) * Lanes_)>
    54 struct Load {
    57 
    59  static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
    60  dst = reinterpret_cast<AccessType const*>(&pointer[offset])[0];
    61  }
    62 };
    63 
    65 
    66 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
    67 struct Load<Scalar_, Lanes_, Memory_, true, 4> {
    70 
    72  static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
    73  dst.registers[0] = reinterpret_cast<uint32_t const*>(&pointer[offset])[0];
    74  }
    75 };
    76 
    78 
    79 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
    80 struct Load<Scalar_, Lanes_, Memory_, true, 8> {
    83 
    85  static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
    86  uint2 tmp = reinterpret_cast<uint2 const*>(&pointer[offset])[0];
    87  dst.registers[0] = tmp.x;
    88  dst.registers[1] = tmp.y;
    89  }
    90 };
    91 
    93 
    94 template <MemorySpace::Kind Memory_>
    95 struct Load<double, 2, Memory_, true, 16> {
    98 
    100  static CUTLASS_DEVICE void load(AccessType& dst, double const* pointer, int offset) {
    101  double2 tmp = reinterpret_cast<double2 const*>(&pointer[offset])[0];
    102  dst[0] = tmp.x;
    103  dst[1] = tmp.y;
    104  }
    105 };
    106 
    108 
    109 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
    110 struct Load<Scalar_, Lanes_, Memory_, true, 16> {
    113 
    115  static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
    116  uint4 tmp = reinterpret_cast<uint4 const*>(&pointer[offset])[0];
    117  dst.registers[0] = tmp.x;
    118  dst.registers[1] = tmp.y;
    119  dst.registers[2] = tmp.z;
    120  dst.registers[3] = tmp.w;
    121  }
    122 };
    123 
    125 
    126 template <typename Scalar_,
    127  int Lanes_,
    128  MemorySpace::Kind Memory_,
    129  bool = (Lanes_ > 1),
    130  size_t = (sizeof(Scalar_) * Lanes_)>
    131 struct Store {
    134 
    136  static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
    137  pointer[offset] = src;
    138  }
    139 };
    140 
    142 
    143 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
    144 struct Store<Scalar_, Lanes_, Memory_, true, 4> {
    147 
    149  static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
    150  uint32_t* addr = reinterpret_cast<uint32_t*>(&pointer[offset]);
    151  addr[0] = src.registers[0];
    152  }
    153 };
    154 
    156 
    157 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
    158 struct Store<Scalar_, Lanes_, Memory_, true, 8> {
    161 
    163  static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
    164  uint2* addr = reinterpret_cast<uint2*>(&pointer[offset]);
    165  addr[0] = make_uint2(src.registers[0], src.registers[1]);
    166  }
    167 };
    168 
    170 
    171 template <MemorySpace::Kind Memory_>
    172 struct Store<double, 2, Memory_, true, 16> {
    175 
    177  static CUTLASS_DEVICE void store(AccessType const& src, double* pointer, int offset) {
    178  double2* addr = reinterpret_cast<double2*>(&pointer[offset]);
    179  addr[0] = make_double2(src[0], src[1]);
    180  }
    181 };
    182 
    184 
    185 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
    186 struct Store<Scalar_, Lanes_, Memory_, true, 16> {
    189 
    191  static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
    192  uint4* addr = reinterpret_cast<uint4*>(&pointer[offset]);
    193  addr[0] = make_uint4(src.registers[0], src.registers[1], src.registers[2], src.registers[3]);
    194  }
    195 };
    196 
    198 
    199 } // namespace cutlass
    Vectorize< Scalar_, Lanes_ >::Type AccessType
    The output type.
    Definition: load_store.h:188
    -
    Definition: load_store.h:42
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include "cutlass/vector.h"
    31 namespace cutlass {
    32 
    34 
    38 struct MemorySpace {
    39  enum Kind {
    40  kGeneric, // Data accessed through pointer dereferencing
    41  kShared, // Data resides in shared memory
    42  kGlobal // Data resides in global memory
    43  };
    44 };
    45 
    49 };
    50 
    52 
    53 template <typename Scalar_,
    54  int kAccessSize,
    55  MemorySpace::Kind Memory_,
    57  typename FragmentElement_ = Scalar_,
    58  int kStride = 1,
    59  size_t size = (sizeof(Scalar_) * kAccessSize)>
    60 struct Load {
    63 
    65  static CUTLASS_HOST_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
    66  dst = *reinterpret_cast<AccessType const*>(pointer + offset);
    67  }
    68 
    69 };
    70 
    72 
    74 template <typename Scalar_, int kAccessSize, MemorySpace::Kind Memory_>
    75 struct Load<Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, 1, 2> {
    78 
    80  static CUTLASS_HOST_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
    81  reinterpret_cast<uint16_t&>(dst) = reinterpret_cast<uint16_t const*>(&pointer[offset])[0];
    82  }
    83 };
    84 
    86 
    87 template <typename Scalar_, int kAccessSize, MemorySpace::Kind Memory_, int kStride>
    88 struct Load<Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 4> {
    91 
    93  static CUTLASS_HOST_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
    94  dst.registers[0] = reinterpret_cast<uint32_t const*>(&pointer[offset])[0];
    95  }
    96 
    97 };
    98 
    100 
    101 template <typename Scalar_, int kAccessSize, MemorySpace::Kind Memory_, int kStride>
    102 struct Load<Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 8> {
    105 
    107  static CUTLASS_HOST_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
    108  uint2 tmp = reinterpret_cast<uint2 const*>(&pointer[offset])[0];
    109  dst.registers[0] = tmp.x;
    110  dst.registers[1] = tmp.y;
    111  }
    112 };
    113 
    115 
    116 template <MemorySpace::Kind Memory_, int kStride>
    117 struct Load<double, 2, Memory_, FragmentElementType::kScalar, double, kStride, 16> {
    120 
    122  static CUTLASS_HOST_DEVICE void load(AccessType& dst, double const* pointer, int offset) {
    123  double2 tmp = reinterpret_cast<double2 const*>(&pointer[offset])[0];
    124  dst[0] = tmp.x;
    125  dst[1] = tmp.y;
    126  }
    127 };
    128 
    130 
    131 #if defined(__CUDACC_VERSION_MAJOR) && __CUDACC_VERSION_MAJOR < 10
    132 // WAR bug in NVCC where the upper and lower half of the register end up being the same
    133 template <MemorySpace::Kind Memory_, int kStride>
    134 struct Load<half, 8, Memory_, FragmentElementType::kScalar, half, kStride, 16> {
    136  typedef typename Vectorize<half, 8>::Type AccessType;
    137 
    139  static CUTLASS_HOST_DEVICE void load(AccessType& dst, half const* pointer, int offset) {
    140  int2 tmp = reinterpret_cast<int2 const*>(&pointer[offset])[0];
    141  dst.registers[0] = tmp.x;
    142  dst.registers[1] = tmp.y;
    143 
    144  tmp = reinterpret_cast<int2 const*>(&pointer[offset + 4])[0];
    145  dst.registers[2] = tmp.x;
    146  dst.registers[3] = tmp.y;
    147  }
    148 };
    149 
    150 #endif
    151 
    153 
    154 template <typename Scalar_, int kAccessSize, MemorySpace::Kind Memory_, int kStride>
    155 struct Load<Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 16> {
    158 
    160  static CUTLASS_HOST_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
    161  uint4 tmp = reinterpret_cast<uint4 const*>(&pointer[offset])[0];
    162  dst.registers[0] = tmp.x;
    163  dst.registers[1] = tmp.y;
    164  dst.registers[2] = tmp.z;
    165  dst.registers[3] = tmp.w;
    166  }
    167 };
    168 
    170 
    171 template <typename Scalar_,
    172  int kAccessSize,
    173  MemorySpace::Kind Memory_,
    175  typename FragmentElement_ = Scalar_,
    176  int kStride = 1,
    177  size_t size = (sizeof(Scalar_) * kAccessSize)>
    178 struct Store {
    181 
    183  static CUTLASS_HOST_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
    184  pointer[offset] = *reinterpret_cast<Scalar_ const*>(&src);
    185  }
    186 };
    187 
    189 
    190 template <typename Scalar_, int kAccessSize, MemorySpace::Kind Memory_>
    191 struct Store<Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, 1, 2> {
    194 
    196  static CUTLASS_HOST_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
    197  uint16_t* addr = reinterpret_cast<uint16_t*>(&pointer[offset]);
    198  addr[0] = reinterpret_cast<uint16_t const&>(src);
    199  }
    200 };
    201 
    203 
    204 template <typename Scalar_, int kAccessSize, MemorySpace::Kind Memory_, int kStride>
    205 struct Store<Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 4> {
    208 
    210  static CUTLASS_HOST_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
    211  uint32_t* addr = reinterpret_cast<uint32_t*>(&pointer[offset]);
    212  addr[0] = src.registers[0];
    213  }
    214 };
    215 
    217 
    218 template <typename Scalar_, int kAccessSize, MemorySpace::Kind Memory_, int kStride>
    219 struct Store<Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 8> {
    222 
    224  static CUTLASS_HOST_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
    225  uint2* addr = reinterpret_cast<uint2*>(&pointer[offset]);
    226  addr[0] = make_uint2(src.registers[0], src.registers[1]);
    227  }
    228 };
    229 
    231 
    232 template <MemorySpace::Kind Memory_, int kStride>
    233 struct Store<double, 2, Memory_, FragmentElementType::kScalar, double, kStride, 16> {
    236 
    238  static CUTLASS_HOST_DEVICE void store(AccessType const& src, double* pointer, int offset) {
    239  double2* addr = reinterpret_cast<double2*>(&pointer[offset]);
    240  addr[0] = make_double2(src[0], src[1]);
    241  }
    242 };
    243 
    245 
    246 template <typename Scalar_, int kAccessSize, MemorySpace::Kind Memory_, int kStride>
    247 struct Store<Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 16> {
    250 
    252  static CUTLASS_HOST_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
    253  uint4* addr = reinterpret_cast<uint4*>(&pointer[offset]);
    254  addr[0] = make_uint4(src.registers[0], src.registers[1], src.registers[2], src.registers[3]);
    255  }
    256 };
    257 
    259 
    260 template <typename Scalar_,
    261  int kAccessSize,
    262  MemorySpace::Kind Memory_,
    263  typename FragmentElement_,
    264  int kStride,
    265  size_t size>
    266 struct Load<Scalar_,
    267  kAccessSize,
    268  Memory_,
    269  FragmentElementType::kWmmaMatrix,
    270  FragmentElement_,
    271  kStride,
    272  size> {
    274  typedef FragmentElement_ AccessType;
    275 
    277  static CUTLASS_HOST_DEVICE void load(AccessType& value, Scalar_ const* pointer, int offset) {
    278  value.load(&pointer[offset], kStride);
    279  }
    280 };
    281 
    283 
    284 template <int kAccessSize,
    285  MemorySpace::Kind Memory_,
    286  typename FragmentElement_,
    287  int kStride,
    288  size_t size>
    289 struct Load<Vector<bin1_t, 32>,
    290  kAccessSize,
    291  Memory_,
    292  FragmentElementType::kWmmaMatrix,
    293  FragmentElement_,
    294  kStride,
    295  size> {
    297  typedef FragmentElement_ AccessType;
    298 
    300  static CUTLASS_HOST_DEVICE void load(AccessType& value, Vector<bin1_t, 32> const* pointer,
    301  int offset) {
    302  value.load(&pointer[offset], kStride * 32);
    303  }
    304 };
    305 
    307 
    308 template <int kAccessSize,
    309  MemorySpace::Kind Memory_,
    310  typename FragmentElement_,
    311  int kStride,
    312  size_t size>
    313 struct Load<Vector<int4_t, 8>,
    314  kAccessSize,
    315  Memory_,
    316  FragmentElementType::kWmmaMatrix,
    317  FragmentElement_,
    318  kStride,
    319  size> {
    321  typedef FragmentElement_ AccessType;
    322 
    324  static CUTLASS_HOST_DEVICE void load(AccessType& value, Vector<int4_t, 8> const* pointer,
    325  int offset) {
    326  value.load(&pointer[offset], kStride * 8);
    327  }
    328 };
    329 
    331 
    332 template <int kAccessSize,
    333  MemorySpace::Kind Memory_,
    334  typename FragmentElement_,
    335  int kStride,
    336  size_t size>
    337 struct Load<Vector<uint4_t, 8>,
    338  kAccessSize,
    339  Memory_,
    340  FragmentElementType::kWmmaMatrix,
    341  FragmentElement_,
    342  kStride,
    343  size> {
    345  typedef FragmentElement_ AccessType;
    346 
    348  static CUTLASS_HOST_DEVICE void load(AccessType& value, Vector<uint4_t, 8> const* pointer,
    349  int offset) {
    350  value.load(&pointer[offset], kStride * 8);
    351  }
    352 };
    353 
    355 template <typename Scalar_,
    356  int kAccessSize,
    357  MemorySpace::Kind Memory_,
    358  typename FragmentElement_,
    359  int kStride,
    360  size_t size>
    361 struct Store<Scalar_,
    362  kAccessSize,
    363  Memory_,
    364  FragmentElementType::kWmmaMatrix,
    365  FragmentElement_,
    366  kStride,
    367  size> {
    369  typedef FragmentElement_ AccessType;
    370 
    372  static CUTLASS_HOST_DEVICE void store(AccessType const& value, Scalar_* pointer, int offset) {
    373  value.store(&pointer[offset], kStride);
    374  }
    375 };
    376 
    378 
    379 } // namespace cutlass
    static CUTLASS_HOST_DEVICE void load(AccessType &value, Vector< bin1_t, 32 > const *pointer, int offset)
    The load function.
    Definition: load_store.h:300
    +
    Vectorize< Scalar_, kAccessSize >::Type AccessType
    The output type.
    Definition: load_store.h:157
    +
    Vectorize< Scalar_, kAccessSize >::Type AccessType
    The output type.
    Definition: load_store.h:77
    +
    static CUTLASS_HOST_DEVICE void store(AccessType const &src, double *pointer, int offset)
    The store function.
    Definition: load_store.h:238
    +
    static CUTLASS_HOST_DEVICE void load(AccessType &value, Vector< int4_t, 8 > const *pointer, int offset)
    The load function.
    Definition: load_store.h:324
    +
    Definition: load_store.h:41
    Definition: convert.h:33
    -
    static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
    The store function.
    Definition: load_store.h:163
    -
    Enum to specify which memory space data resides in.
    Definition: load_store.h:39
    -
    Definition: load_store.h:43
    -
    static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
    The load function.
    Definition: load_store.h:59
    -
    Vectorize< Scalar_, Lanes_ >::Type AccessType
    The output type.
    Definition: load_store.h:112
    -
    Vectorize< Scalar_, Lanes_ >::Type AccessType
    The output type.
    Definition: load_store.h:146
    -
    Kind
    Definition: load_store.h:40
    -
    Definition: load_store.h:131
    -
    static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
    The store function.
    Definition: load_store.h:136
    -
    uint32_t registers[kRegisters]
    The data in registers.
    Definition: vector.h:80
    -
    Vectorize< double, 2 >::Type AccessType
    The output type.
    Definition: load_store.h:174
    -
    Definition: load_store.h:41
    -
    static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
    The store function.
    Definition: load_store.h:72
    -
    Vectorize< Scalar_, Lanes_ >::Type AccessType
    The output type.
    Definition: load_store.h:133
    -
    Definition: vector.h:61
    -
    static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
    The store function.
    Definition: load_store.h:85
    -
    Definition: load_store.h:54
    -
    Vectorize< Scalar_, Lanes_ >::Type AccessType
    The output type.
    Definition: load_store.h:82
    + +
    Definition: numeric_types.h:39
    +
    Enum to specify which memory space data resides in.
    Definition: load_store.h:38
    +
    static CUTLASS_HOST_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
    The store function.
    Definition: load_store.h:196
    +
    static CUTLASS_HOST_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
    The store function.
    Definition: load_store.h:252
    + +
    Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix.
    Definition: load_store.h:47
    +
    Definition: load_store.h:42
    + + +
    Vectorize< double, 2 >::Type AccessType
    The output type.
    Definition: load_store.h:119
    +
    Vectorize< FragmentElement_, kAccessSize >::Type AccessType
    The output type.
    Definition: load_store.h:180
    +
    Kind
    Definition: load_store.h:39
    +
    Definition: load_store.h:178
    +
    static CUTLASS_HOST_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
    The load function.
    Definition: load_store.h:160
    +
    uint32_t registers[kRegisters]
    The data in registers.
    Definition: vector.h:81
    +
    static CUTLASS_HOST_DEVICE void load(AccessType &value, Scalar_ const *pointer, int offset)
    The load function.
    Definition: load_store.h:277
    +
    Vectorize< Scalar_, kAccessSize >::Type AccessType
    The output type.
    Definition: load_store.h:193
    +
    Vectorize< Scalar_, kAccessSize >::Type AccessType
    The output type.
    Definition: load_store.h:104
    +
    Kind
    Definition: load_store.h:48
    +
    Definition: load_store.h:40
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    Vectorize< Scalar_, kAccessSize >::Type AccessType
    The output type.
    Definition: load_store.h:62
    +
    static CUTLASS_HOST_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
    The load function.
    Definition: load_store.h:107
    +
    Definition: vector.h:62
    +
    Definition: load_store.h:60
    + +
    static CUTLASS_HOST_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
    The load function.
    Definition: load_store.h:93
    +
    Definition: load_store.h:48
    +
    Vector< Element_, kLanes_ > Type
    Definition: vector.h:271
    Defines a 1D vector of elements held in the registers of each thread.
    -
    Vectorize< Scalar_, Lanes_ >::Type AccessType
    The output type.
    Definition: load_store.h:160
    -
    static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
    The store function.
    Definition: load_store.h:115
    -
    Vectorize< Scalar_, Lanes_ >::Type AccessType
    The output type.
    Definition: load_store.h:69
    -
    static CUTLASS_DEVICE void load(AccessType &dst, double const *pointer, int offset)
    The store function.
    Definition: load_store.h:100
    -
    Vectorize< double, 2 >::Type AccessType
    The output type.
    Definition: load_store.h:97
    -
    Vectorize< Scalar_, Lanes_ >::Type AccessType
    The output type.
    Definition: load_store.h:56
    -
    static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
    The store function.
    Definition: load_store.h:191
    -
    static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
    The store function.
    Definition: load_store.h:149
    -
    static CUTLASS_DEVICE void store(AccessType const &src, double *pointer, int offset)
    The store function.
    Definition: load_store.h:177
    +
    Vectorize< Scalar_, kAccessSize >::Type AccessType
    The output type.
    Definition: load_store.h:249
    +
    static CUTLASS_HOST_DEVICE void load(AccessType &value, Vector< uint4_t, 8 > const *pointer, int offset)
    The load function.
    Definition: load_store.h:348
    +
    Definition: numeric_types.h:43
    +
    Vectorize< Scalar_, kAccessSize >::Type AccessType
    The output type.
    Definition: load_store.h:90
    +
    static CUTLASS_HOST_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
    The store function.
    Definition: load_store.h:183
    +
    static CUTLASS_HOST_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
    The store function.
    Definition: load_store.h:224
    +
    Vectorize< Scalar_, kAccessSize >::Type AccessType
    The output type.
    Definition: load_store.h:221
    +
    Vectorize< Scalar_, kAccessSize >::Type AccessType
    The output type.
    Definition: load_store.h:207
    +
    static CUTLASS_HOST_DEVICE void store(AccessType const &value, Scalar_ *pointer, int offset)
    The store function.
    Definition: load_store.h:372
    +
    Definition: numeric_types.h:41
    + +
    static CUTLASS_HOST_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
    The store function.
    Definition: load_store.h:210
    +
    Vectorize< double, 2 >::Type AccessType
    The output type.
    Definition: load_store.h:235
    +
    static CUTLASS_HOST_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
    The load function.
    Definition: load_store.h:65
    +
    static CUTLASS_HOST_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
    The load function.
    Definition: load_store.h:80
    +
    static CUTLASS_HOST_DEVICE void load(AccessType &dst, double const *pointer, int offset)
    The load function.
    Definition: load_store.h:122
    diff --git a/docs/matrix__traits_8h.html b/docs/matrix__traits_8h.html index f83c89f0d..097d5f4ae 100644 --- a/docs/matrix__traits_8h.html +++ b/docs/matrix__traits_8h.html @@ -82,17 +82,39 @@ $(function() {

    Defines properties of matrices used to denote layout and operands to GEMM kernels. More...

    - +
    #include "cutlass/coord.h"
    +

    Go to the source code of this file.

    @@ -120,7 +138,7 @@ Namespaces

    + + - + + + + + + + + + + + + + + + + + + + +

    Classes

    struct  cutlass::MatrixCoord
     
    struct  cutlass::MatrixLayout
     Describes layouts of matrices. More...
     Defines data layouts of various matrix formats usable by TensorRef and other classes. More...
     
    struct  cutlass::MatrixLayout::RowMajor
     Mapping function for row-major matrices. More...
     
    struct  cutlass::MatrixLayout::ColumnMajor
     Mapping function for column-major matrices. More...
     
    struct  cutlass::MatrixLayout::RowMajorInterleaved< Interleave >
     
    struct  cutlass::MatrixLayout::ColumnMajorInterleaved< Interleave >
     
    struct  cutlass::MatrixLayout::ContiguousLayout
     
    struct  cutlass::MatrixLayout::ColumnMajorBlockLinear< BlockRows, BlockColumns >
     
    struct  cutlass::MatrixLayout::RowMajorBlockLinear< BlockRows, BlockColumns >
     
    struct  cutlass::GemmOperand
     Gemm operand - D = A * B + C. More...
     
    struct  cutlass::MatrixTransform
     Transformation applied to matrix operands. More...
     
    @@ -102,7 +124,7 @@ Namespaces diff --git a/docs/matrix__traits_8h_source.html b/docs/matrix__traits_8h_source.html index 9f8de2dc6..2e78c5a2b 100644 --- a/docs/matrix__traits_8h_source.html +++ b/docs/matrix__traits_8h_source.html @@ -76,21 +76,88 @@ $(function() {
    matrix_traits.h
    -Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 namespace cutlass {
    31 
    33 
    35 struct MatrixLayout {
    37 };
    38 
    40 
    42 struct GemmOperand {
    43  enum Kind { kA, kB, kC, kD };
    44 };
    45 
    47 
    48 } // namespace cutlass
    Definition: convert.h:33
    -
    Definition: matrix_traits.h:43
    -
    Describes layouts of matrices.
    Definition: matrix_traits.h:35
    -
    Definition: matrix_traits.h:36
    -
    Definition: matrix_traits.h:43
    -
    Gemm operand - D = A * B + C.
    Definition: matrix_traits.h:42
    -
    Definition: matrix_traits.h:36
    -
    Kind
    Definition: matrix_traits.h:36
    -
    Kind
    Definition: matrix_traits.h:43
    -
    Definition: matrix_traits.h:43
    -
    Definition: matrix_traits.h:43
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include "cutlass/coord.h"
    31 
    32 namespace cutlass {
    33 
    35 
    38 struct MatrixCoord : public Coord<2, int> {
    39 
    41  typedef int Index;
    42 
    45 
    47  static int const kRow = 0;
    48 
    50  static int const kColumn = 1;
    51 
    52  //
    53  // Methods
    54  //
    55 
    59 
    62  MatrixCoord(Coord<2, Index> const &coord): Base(coord) { }
    63 
    67 
    70  Index const & row() const { return this->at(kRow); }
    71 
    74  Index & row() { return this->at(kRow); }
    75 
    78  Index const & column() const { return this->at(kColumn); }
    79 
    82  Index & column() { return this->at(kColumn); }
    83 
    84  //
    85  // Coord operators
    86  //
    87 
    90  MatrixCoord operator+(Base const& b) const {
    91  return MatrixCoord(Base::operator+(b));
    92  }
    93 
    96  MatrixCoord operator-(Base const& b) const {
    97  return MatrixCoord(Base::operator-(b));
    98  }
    99 
    102  MatrixCoord operator*(Base const& b) const {
    103  return MatrixCoord(Base::operator*(b));
    104  }
    105 
    108  MatrixCoord operator/(Base const& b) const {
    109  return MatrixCoord(Base::operator/(b));
    110  }
    111 
    115  Base::operator+=(b);
    116  return *this;
    117  }
    118 
    122  Base::operator-=(b);
    123  return *this;
    124  }
    125 
    129  Base::operator*=(b);
    130  return *this;
    131  }
    132 
    136  Base::operator/=(b);
    137  return *this;
    138  }
    139 };
    140 
    142 
    144 //
    145 // The following define classes satisfying the TensorRefMapFunc concept. These must support the
    146 // following operations, where func is an instance of type TensorRefMapFunc.
    147 //
    148 // Coord<TensorRefMapFunc::kStorageRank> = func(Coord<kRank>);
    149 //
    150 // Though not required to be usable by TensorRef, each of the following also define a helper
    151 // function to map the "leading dimension" to an appropriate stride vector. Implementations
    152 // following this convention should also implement the following static method:
    153 //
    154 // Coord<TensorRefMapFunc::kStorageRank> stride = TensorRefMapFunc::stride(leading_dim);
    155 //
    156 struct MatrixLayout {
    157 
    160 
    161  //
    162  // TensorRefMapFunc definitions for common layouts
    163  //
    164 
    166  struct RowMajor {
    167  static int const kStorageRank = 2;
    171  return coord;
    172  }
    173  };
    174 
    176  struct ColumnMajor {
    177  static int const kStorageRank = 2;
    181  return make_Coord(coord.column(), coord.row());
    182  }
    183  };
    184 
    187  template <int Interleave>
    189 
    191  static int const kStorageRank = 3;
    192 
    194  static int const kInterleave = Interleave;
    195 
    199  return make_Coord(
    200  coord.row() / kInterleave,
    201  coord.column(),
    202  coord.row() % kInterleave
    203  );
    204  }
    205 
    208  static Coord<kStorageRank> stride(int ldm) {
    209  return make_Coord(
    210  ldm * kInterleave,
    211  kInterleave,
    212  1
    213  );
    214  }
    215  };
    216 
    219  template <int Interleave>
    221 
    223  static int const kStorageRank = 3;
    224 
    226  static int const kInterleave = Interleave;
    227 
    231  return make_Coord(
    232  coord.column() / kInterleave,
    233  coord.row(),
    234  coord.column() % kInterleave
    235  );
    236  }
    237 
    240  static Coord<kStorageRank> stride(int ldm) {
    241  return make_Coord(
    242  ldm * kInterleave,
    243  kInterleave,
    244  1
    245  );
    246  }
    247  };
    248 
    253  static int const kStorageRank = 3;
    254 
    256  static int const kRow = 0;
    257 
    259  static int const kColumn = 1;
    260 
    265  return make_Coord(coord.row(), coord.column(), 0);
    266  }
    267 
    271  if (layout == MatrixLayout::kRowMajor) {
    272  return make_Coord(ldm, 1, 1);
    273  }
    274  return make_Coord(1, ldm, 1);
    275  }
    276  };
    277 
    280  template <int BlockRows, int BlockColumns>
    282 
    284  static int const kStorageRank = 4;
    285 
    287  static int const kBlockRows = BlockRows;
    288 
    290  static int const kBlockColumns = BlockColumns;
    291 
    295  return make_Coord(
    296  coord.column() / kBlockColumns,
    297  coord.row() / kBlockRows,
    298  coord.column() % kBlockColumns,
    299  coord.row() % kBlockRows
    300  );
    301  }
    302 
    305  static Coord<kStorageRank> stride(int ldm) {
    306  return make_Coord(
    307  ldm * kBlockRows * kBlockColumns,
    309  kBlockRows,
    310  1
    311  );
    312  }
    313  };
    314 
    317  template <int BlockRows, int BlockColumns>
    319 
    321  static int const kStorageRank = 4;
    322 
    324  static int const kBlockRows = BlockRows;
    325 
    327  static int const kBlockColumns = BlockColumns;
    328 
    332  return make_Coord(
    333  coord.row() / kBlockRows,
    334  coord.column() / kBlockColumns,
    335  coord.row() % kBlockRows,
    336  coord.column() % kBlockColumns
    337  );
    338  }
    339 
    342  static Coord<kStorageRank> stride(int ldm) {
    343  return make_Coord(
    344  ldm * kBlockRows * kBlockColumns,
    347  1
    348  );
    349  }
    350  };
    351 };
    352 
    354 
    356 struct GemmOperand {
    357  enum Kind { kA, kB, kC, kD };
    358 };
    359 
    361 
    364  enum Kind {
    367  };
    368 };
    369 
    371 
    372 } // namespace cutlass
    int Index
    Integer-valued index.
    Definition: matrix_traits.h:41
    +
    Mapping function for column-major matrices.
    Definition: matrix_traits.h:176
    +
    static int const kBlockColumns
    Interleaving size in columns dimension.
    Definition: matrix_traits.h:327
    +
    Definition: convert.h:33
    +
    CUTLASS_HOST_DEVICE Coord< kStorageRank > operator()(MatrixCoord const &coord) const
    Maps (row, col) to (col, row, col)
    Definition: matrix_traits.h:230
    +
    CUTLASS_HOST_DEVICE Coord< kStorageRank > operator()(MatrixCoord const &coord) const
    Maps (i, j) to (i, j)
    Definition: matrix_traits.h:170
    +
    Transformation applied to matrix operands.
    Definition: matrix_traits.h:363
    +
    Definition: matrix_traits.h:188
    +
    static int const kBlockColumns
    Interleaving size in columns dimension.
    Definition: matrix_traits.h:290
    +
    Definition: matrix_traits.h:365
    +
    Definition: matrix_traits.h:281
    +
    Definition: matrix_traits.h:220
    +
    A Coord is a coordinate of arbitrary rank into a tensor or matrix.
    +
    CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
    Helper to make a 2-element coordinate.
    Definition: coord.h:318
    +
    no operation
    Definition: matrix_traits.h:366
    +
    CUTLASS_HOST_DEVICE MatrixCoord & operator/=(Base const &b)
    In-place division.
    Definition: matrix_traits.h:135
    +
    static int const kStorageRank
    Definition: matrix_traits.h:167
    +
    Definition: matrix_traits.h:251
    +
    CUTLASS_HOST_DEVICE Coord< kStorageRank > operator()(MatrixCoord const &coord) const
    Maps (i, j) to (j, i)
    Definition: matrix_traits.h:180
    +
    Kind
    Definition: matrix_traits.h:364
    +
    Coord< 2, Index > Base
    Base type is a Coord of rank=2.
    Definition: matrix_traits.h:44
    +
    CUTLASS_HOST_DEVICE MatrixCoord operator+(Base const &b) const
    Element-wise addition.
    Definition: matrix_traits.h:90
    +
    CUTLASS_HOST_DEVICE Coord & operator*=(Coord const &b)
    In-place multiplication.
    Definition: coord.h:197
    +
    Definition: matrix_traits.h:357
    +
    static int const kRow
    Dimension of rows.
    Definition: matrix_traits.h:256
    +
    static int const kStorageRank
    Definition: matrix_traits.h:177
    +
    static int const kBlockRows
    Interleaving size in rows dimension.
    Definition: matrix_traits.h:287
    +
    Defines data layouts of various matrix formats usable by TensorRef and other classes.
    Definition: matrix_traits.h:156
    +
    static int const kInterleave
    Interleaving size.
    Definition: matrix_traits.h:194
    +
    Definition: matrix_traits.h:159
    +
    CUTLASS_HOST_DEVICE Index const & column() const
    Returns the column of the coordinate.
    Definition: matrix_traits.h:78
    +
    CUTLASS_HOST_DEVICE MatrixCoord(Index row, Index column)
    Helper to construct from a row and column.
    Definition: matrix_traits.h:66
    +
    static CUTLASS_HOST_DEVICE Coord< kStorageRank > stride(int ldm)
    Helper to compute stride vector from leading dimension.
    Definition: matrix_traits.h:208
    +
    static int const kColumn
    Dimension of columns.
    Definition: matrix_traits.h:259
    +
    static int const kStorageRank
    Rank of storage n-D array.
    Definition: matrix_traits.h:191
    +
    CUTLASS_HOST_DEVICE Coord & operator-=(Coord const &b)
    In-place subtraction.
    Definition: coord.h:188
    +
    static int const kStorageRank
    Arbitrary storage rank.
    Definition: matrix_traits.h:253
    +
    Definition: matrix_traits.h:357
    +
    CUTLASS_HOST_DEVICE Coord & operator+=(Coord const &b)
    In-place addition.
    Definition: coord.h:179
    +
    CUTLASS_HOST_DEVICE Coord< kStorageRank > operator()(MatrixCoord const &coord) const
    Maps (row, col) to (row, col, row)
    Definition: matrix_traits.h:198
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    static int const kBlockRows
    Interleaving size in rows dimension.
    Definition: matrix_traits.h:324
    +
    CUTLASS_HOST_DEVICE Index const & row() const
    Returns the row of the coordinate.
    Definition: matrix_traits.h:70
    +
    CUTLASS_HOST_DEVICE Index & at()
    Gets the index of a given Coord element.
    Definition: coord.h:240
    +
    CUTLASS_HOST_DEVICE Coord & operator/=(Coord const &b)
    In-place division.
    Definition: coord.h:206
    +
    CUTLASS_HOST_DEVICE MatrixCoord operator-(Base const &b) const
    Element-wise subtraction.
    Definition: matrix_traits.h:96
    +
    CUTLASS_HOST_DEVICE MatrixCoord(Coord< 2, Index > const &coord)
    Constructs from Coord<2>
    Definition: matrix_traits.h:62
    +
    static int const kStorageRank
    Rank of storage n-D array.
    Definition: matrix_traits.h:321
    +
    Statically-sized array specifying Coords within a tensor.
    Definition: coord.h:49
    +
    Gemm operand - D = A * B + C.
    Definition: matrix_traits.h:356
    +
    static CUTLASS_HOST_DEVICE Coord< kStorageRank > stride(int ldm)
    Helper to compute stride vector from leading dimension.
    Definition: matrix_traits.h:342
    +
    static int const kRow
    Rows dimension.
    Definition: matrix_traits.h:47
    +
    CUTLASS_HOST_DEVICE MatrixCoord & operator-=(Base const &b)
    In-place subtraction.
    Definition: matrix_traits.h:121
    +
    CUTLASS_HOST_DEVICE MatrixCoord operator*(Base const &b) const
    Element-wise multiplication.
    Definition: matrix_traits.h:102
    +
    Definition: matrix_traits.h:159
    +
    static CUTLASS_HOST_DEVICE Coord< kStorageRank > stride(int ldm)
    Helper to compute stride vector from leading dimension.
    Definition: matrix_traits.h:240
    +
    CUTLASS_HOST_DEVICE Coord< kStorageRank > operator()(MatrixCoord const &coord) const
    Definition: matrix_traits.h:264
    +
    Kind
    Enumeration defining fundamental contiguous layouts.
    Definition: matrix_traits.h:159
    +
    CUTLASS_HOST_DEVICE Index & row()
    Returns the row of the coordinate.
    Definition: matrix_traits.h:74
    +
    static int const kStorageRank
    Rank of storage n-D array.
    Definition: matrix_traits.h:284
    +
    static int const kInterleave
    Interleaving size.
    Definition: matrix_traits.h:226
    +
    CUTLASS_HOST_DEVICE Coord< kStorageRank > operator()(MatrixCoord const &coord) const
    Maps (row, col) to (row, col, row, col)
    Definition: matrix_traits.h:331
    +
    Kind
    Definition: matrix_traits.h:357
    +
    Definition: matrix_traits.h:357
    +
    CUTLASS_HOST_DEVICE Index & column()
    Returns the column of the coordinate.
    Definition: matrix_traits.h:82
    +
    CUTLASS_HOST_DEVICE MatrixCoord & operator*=(Base const &b)
    In-place multiplication.
    Definition: matrix_traits.h:128
    +
    static int const kStorageRank
    Rank of storage n-D array.
    Definition: matrix_traits.h:223
    +
    Definition: matrix_traits.h:318
    +
    CUTLASS_HOST_DEVICE MatrixCoord & operator+=(Base const &b)
    In-place addition.
    Definition: matrix_traits.h:114
    +
    static CUTLASS_HOST_DEVICE Coord< kStorageRank > stride(int ldm)
    Helper to compute stride vector from leading dimension.
    Definition: matrix_traits.h:305
    +
    CUTLASS_HOST_DEVICE Coord< kStorageRank > operator()(MatrixCoord const &coord) const
    Maps (row, col) to (col, row, col, row)
    Definition: matrix_traits.h:294
    +
    static CUTLASS_HOST_DEVICE Coord< kStorageRank > stride(MatrixLayout::Kind layout, int ldm)
    Helper to construct a stride vector based on contiguous matrix layout and leading dimension...
    Definition: matrix_traits.h:270
    +
    Definition: matrix_traits.h:38
    +
    CUTLASS_HOST_DEVICE MatrixCoord operator/(Base const &b) const
    Element-wise division.
    Definition: matrix_traits.h:108
    +
    static int const kColumn
    Columns dimension.
    Definition: matrix_traits.h:50
    +
    CUTLASS_HOST_DEVICE MatrixCoord()
    Default ctor.
    Definition: matrix_traits.h:58
    +
    Definition: matrix_traits.h:357
    +
    Mapping function for row-major matrices.
    Definition: matrix_traits.h:166
    diff --git a/docs/menudata.js b/docs/menudata.js index 725988aa8..dde1bbfea 100644 --- a/docs/menudata.js +++ b/docs/menudata.js @@ -29,24 +29,33 @@ var menudata={children:[ {text:"Namespace Members",url:"namespacemembers.html",children:[ {text:"All",url:"namespacemembers.html",children:[ {text:"_",url:"namespacemembers.html#index__"}, +{text:"a",url:"namespacemembers.html#index_a"}, {text:"c",url:"namespacemembers.html#index_c"}, +{text:"e",url:"namespacemembers.html#index_e"}, {text:"f",url:"namespacemembers.html#index_f"}, {text:"g",url:"namespacemembers.html#index_g"}, {text:"i",url:"namespacemembers.html#index_i"}, {text:"l",url:"namespacemembers.html#index_l"}, {text:"m",url:"namespacemembers.html#index_m"}, +{text:"n",url:"namespacemembers.html#index_n"}, {text:"o",url:"namespacemembers.html#index_o"}, +{text:"p",url:"namespacemembers.html#index_p"}, {text:"r",url:"namespacemembers.html#index_r"}, {text:"s",url:"namespacemembers.html#index_s"}, {text:"t",url:"namespacemembers.html#index_t"}]}, {text:"Functions",url:"namespacemembers_func.html",children:[ {text:"_",url:"namespacemembers_func.html#index__"}, +{text:"a",url:"namespacemembers_func.html#index_a"}, {text:"c",url:"namespacemembers_func.html#index_c"}, +{text:"e",url:"namespacemembers_func.html#index_e"}, +{text:"f",url:"namespacemembers_func.html#index_f"}, {text:"g",url:"namespacemembers_func.html#index_g"}, {text:"i",url:"namespacemembers_func.html#index_i"}, {text:"l",url:"namespacemembers_func.html#index_l"}, {text:"m",url:"namespacemembers_func.html#index_m"}, +{text:"n",url:"namespacemembers_func.html#index_n"}, {text:"o",url:"namespacemembers_func.html#index_o"}, +{text:"p",url:"namespacemembers_func.html#index_p"}, {text:"r",url:"namespacemembers_func.html#index_r"}, {text:"s",url:"namespacemembers_func.html#index_s"}]}, {text:"Typedefs",url:"namespacemembers_type.html"}]}]}, @@ -78,6 +87,7 @@ var menudata={children:[ {text:"v",url:"functions_v.html#index_v"}, {text:"w",url:"functions_w.html#index_w"}, {text:"y",url:"functions_y.html#index_y"}, +{text:"z",url:"functions_z.html#index_z"}, {text:"~",url:"functions_0x7e.html#index_0x7e"}]}, {text:"Functions",url:"functions_func.html",children:[ {text:"a",url:"functions_func.html#index_a"}, @@ -89,8 +99,10 @@ var menudata={children:[ {text:"g",url:"functions_func_g.html#index_g"}, {text:"h",url:"functions_func_h.html#index_h"}, {text:"i",url:"functions_func_i.html#index_i"}, +{text:"k",url:"functions_func_k.html#index_k"}, {text:"l",url:"functions_func_l.html#index_l"}, {text:"m",url:"functions_func_m.html#index_m"}, +{text:"n",url:"functions_func_n.html#index_n"}, {text:"o",url:"functions_func_o.html#index_o"}, {text:"p",url:"functions_func_p.html#index_p"}, {text:"r",url:"functions_func_r.html#index_r"}, @@ -99,6 +111,7 @@ var menudata={children:[ {text:"u",url:"functions_func_u.html#index_u"}, {text:"v",url:"functions_func_v.html#index_v"}, {text:"w",url:"functions_func_w.html#index_w"}, +{text:"z",url:"functions_func_z.html#index_z"}, {text:"~",url:"functions_func_0x7e.html#index_0x7e"}]}, {text:"Variables",url:"functions_vars.html",children:[ {text:"a",url:"functions_vars.html#index_a"}, @@ -113,6 +126,7 @@ var menudata={children:[ {text:"l",url:"functions_vars_l.html#index_l"}, {text:"m",url:"functions_vars_m.html#index_m"}, {text:"n",url:"functions_vars_n.html#index_n"}, +{text:"o",url:"functions_vars_o.html#index_o"}, {text:"p",url:"functions_vars_p.html#index_p"}, {text:"r",url:"functions_vars_r.html#index_r"}, {text:"s",url:"functions_vars_s.html#index_s"}, @@ -127,6 +141,7 @@ var menudata={children:[ {text:"f",url:"functions_type_f.html#index_f"}, {text:"g",url:"functions_type_g.html#index_g"}, {text:"i",url:"functions_type_i.html#index_i"}, +{text:"k",url:"functions_type_k.html#index_k"}, {text:"l",url:"functions_type_l.html#index_l"}, {text:"m",url:"functions_type_m.html#index_m"}, {text:"n",url:"functions_type_n.html#index_n"}, @@ -140,8 +155,10 @@ var menudata={children:[ {text:"Enumerations",url:"functions_enum.html"}, {text:"Enumerator",url:"functions_eval.html",children:[ {text:"a",url:"functions_eval.html#index_a"}, +{text:"b",url:"functions_eval.html#index_b"}, {text:"k",url:"functions_eval.html#index_k"}, {text:"m",url:"functions_eval.html#index_m"}, +{text:"o",url:"functions_eval.html#index_o"}, {text:"v",url:"functions_eval.html#index_v"}]}]}]}, {text:"Files",url:"files.html",children:[ {text:"File List",url:"files.html"}, diff --git a/docs/modules.html b/docs/modules.html index c42247bd4..8fc908440 100644 --- a/docs/modules.html +++ b/docs/modules.html @@ -76,19 +76,20 @@ $(function() {

    Namespaces

    - - - - - - - + + + + + + + +
     Fragment Concept
     Fragment Iterator Concept
     Predicate Vector Concept
     Predicate Iterator Concept
     Predicate Tile Adapter Concept
     Layout Concept
     Tile Traits Concept
     Tile Load Iterator Concept
     Tile Store Iterator Concept
     Identity Block Swizzle
     Predicate Vector Concept
     Predicate Iterator Concept
     Predicate Tile Adapter Concept
     Layout Concept
     Tile Traits Concept
     Tile Load Iterator Concept
     Tile Store Iterator Concept
    diff --git a/docs/namespacecutlass.html b/docs/namespacecutlass.html index 989135cba..4fb1ce9a2 100644 --- a/docs/namespacecutlass.html +++ b/docs/namespacecutlass.html @@ -79,6 +79,8 @@ $(function() { + + @@ -88,24 +90,14 @@ Namespaces Classes + + - - - - - - - - - - - - @@ -129,6 +121,8 @@ Classes + + @@ -143,44 +137,50 @@ Classes + + + - - - - - - - - - - - - + + + + - - + + - + - + + - + - + + + + + + + + + + + @@ -190,22 +190,43 @@ Classes + + - + + + + + + + + + + + + + + + + + + + + @@ -216,6 +237,8 @@ Classes + + @@ -240,19 +263,34 @@ Classes - + - + - + - + + + + + - + + + + + + + - + + + + + + @@ -263,9 +301,15 @@ Classes + + + + + + @@ -282,13 +326,30 @@ Classes + + + + + + + + + + + + + - + + + + + @@ -299,6 +360,20 @@ Classes + + + + + + + + + + + + + +

    Namespaces

     detail
     
     gemm
     
     platform
    struct  AlignedStruct
     
    struct  bin1_t
     
    struct  ComputeOffsetFromShape
     Compute the offset for the given coordinates in a cube. More...
     
    struct  ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >
     Compute the offset for the given coordinates in a cube with one channel and a depth of 1. More...
     
    struct  ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >
     Compute the offset for the given coordinates in a cube with a depth of 1. More...
     
    struct  ComputeOffsetFromStrides
     Compute the offset for the given coordinates in a cube. More...
     
    struct  ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >
     Compute the offset for the given coordinates in a cube with one channel and a depth of 1. More...
     
    struct  ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >
     Compute the offset for the given coordinates in a cube with a depth of 1. More...
     
    struct  ComputeThreadOffsetFromStrides
     Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_. More...
     
     
    struct  divide_assert
     
    struct  DumpType
     
    struct  Extent
     Returns the extent of a scalar or vector. More...
     
     
    struct  FragmentConstIterator
     
    struct  FragmentElementType
     Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix. More...
     
    struct  FragmentIterator
     A template defining Fragment Iterator Concept. More...
     
    struct  FragmentLoad
     
    struct  FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     
    struct  FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     
    struct  FragmentStore
     
    struct  FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     
    struct  FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     
    struct  GemmOperand
     Gemm operand - D = A * B + C. More...
     
    struct  Identity
     Describes identity elements. More...
     
    struct  IdentityTensorMapFunc
     
    struct  int4_t
     
    struct  is_pow2
     
    struct  IteratorAdvance
     Specifies dimension in which post-increment accesses advance. More...
     
    struct  IteratorFragment
     Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix. More...
    struct  KernelLaunchConfiguration
     Structure containing the basic launch configuration of a CUDA kernel. More...
     
    struct  Load
     
    struct  Load< double, 2, Memory_, true, 16 >
    struct  Load< double, 2, Memory_, FragmentElementType::kScalar, double, kStride, 16 >
     
    struct  Load< Scalar_, Lanes_, Memory_, true, 16 >
    struct  Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, 1, 2 >
     Partial specialization for 16b loads. More...
     
    struct  Load< Scalar_, Lanes_, Memory_, true, 4 >
    struct  Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 16 >
     
    struct  Load< Scalar_, Lanes_, Memory_, true, 8 >
    struct  Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 4 >
     
    struct  Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 8 >
     
    struct  Load< Scalar_, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size >
     
    struct  Load< Vector< bin1_t, 32 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size >
     
    struct  Load< Vector< int4_t, 8 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size >
     
    struct  Load< Vector< uint4_t, 8 >, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size >
     
    struct  log2_down
     
     
    struct  log2_up< N, 1, Count >
     
    struct  MatrixCoord
     
    struct  MatrixLayout
     Describes layouts of matrices. More...
     Defines data layouts of various matrix formats usable by TensorRef and other classes. More...
     
    struct  MatrixTransform
     Transformation applied to matrix operands. More...
     
    struct  Max
     
    struct  MemorySpace
     Enum to specify which memory space data resides in. More...
     
    struct  Min
     
    struct  PredicatedTileLoadStream
     Generic stream for loading and transforming fragments. More...
     
    struct  PredicatedTileStoreStream
     Generic stream for transforming and storing fragments. More...
     
    struct  PredicateTileAdapter
     Adapter to enable random access to predicates via logical coordinate within a tile. More...
     
    struct  PredicateVector
     Statically sized array of bits implementing. More...
     
    struct  RegularTilePredicateFunctor
     Functor computing a predicate given the logical position of an access. More...
     
    struct  ReshapeTile
     
    struct  ReshapeTile< Tile_, kAccessSize_, true >
     
    struct  ScalarIO
     Helper to enable formatted printing of CUTLASS scalar types to an ostream. More...
     
    struct  Shape
     A Shape implementing Layout Concept describing the dimensions of a cube. More...
     
     
    struct  ShapeDiv
     
    struct  ShapeDivCeiling
     
    struct  ShapeMax
     
    struct  ShapeMin
     
    struct  Store
     
    struct  Store< double, 2, Memory_, true, 16 >
    struct  Store< double, 2, Memory_, FragmentElementType::kScalar, double, kStride, 16 >
     
    struct  Store< Scalar_, Lanes_, Memory_, true, 16 >
    struct  Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, 1, 2 >
     
    struct  Store< Scalar_, Lanes_, Memory_, true, 4 >
    struct  Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 16 >
     
    struct  Store< Scalar_, Lanes_, Memory_, true, 8 >
    struct  Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 4 >
     
    struct  Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kScalar, Scalar_, kStride, 8 >
     
    struct  Store< Scalar_, kAccessSize, Memory_, FragmentElementType::kWmmaMatrix, FragmentElement_, kStride, size >
     
    class  TensorRef
     Structure modeling a pointer and stride into a tensor. More...
     
    class  TensorRef< Storage_, Rank_, MapFunc_, 1, Index_, LongIndex_ >
     Specialization for rank=1 case with no internal StrideVector. More...
     
    struct  TensorRefArray
     
    struct  TensorRefBatchStrided
     
    class  TensorView
     Host-side reference implementation of tensor operations. More...
     Defines a view into a logical tensor. More...
     
    struct  TileAllocation
     Class for storing a tile in memory and accessing it through a tensor ref. More...
     
    struct  TileCoord
     
    struct  TiledThreadOffset
     Basic thread offset function computed from a thread shape. More...
    struct  TileLoadIterator
     An iterator implementing Tile Load Iterator Concept for loading a tile from memory. More...
     
    struct  TileLoadStream
     Generic stream for loading and transforming fragments. More...
     
    struct  TileStoreIterator
     An iterator implementing Tile Store Iterator Concept for storing a tile to memory. More...
     
    struct  TileStoreStream
     Generic stream for transforming and storing fragments. More...
     
    struct  TileTraits
     A template defining Tile Traits Concept. More...
     
    struct  TrivialPredicateTileAdapter
     Always returns true predicate. More...
     
    struct  uint4_t
     
    union  Vector
     
    union  Vector< bin1_t, kLanes_ >
     Vector definition for 1-bit binary datatype. More...
     
    union  Vector< half, 1 >
     
    union  Vector< half, kLanes_ >
     
    union  Vector< int4_t, kLanes_ >
     Vector definition for 4-bit signed integer datatype. More...
     
    union  Vector< uint4_t, kLanes_ >
     Vector definition for 4-bit unsigned integer datatype. More...
     
    struct  Vectorize
     
    struct  Vectorize< Element_, 1 >
    struct  Vectorize< Vector< bin1_t, 32 >, kLanes_ >
     
    struct  Vectorize< Vector< int4_t, 8 >, kLanes_ >
     
    struct  Vectorize< Vector< uint4_t, 8 >, kLanes_ >
     
    struct  VectorTraits
     Traits describing properties of vectors and scalar-as-vectors. More...
    struct  VectorTraits< Vector< T, Lanes > const >
     Partial specialization for actual cutlass::Vector. More...
     
    struct  ZipConvert
     Zips two convert operations. More...
     
    struct  ZipFragment
     A template defining Fragment Concept. More...
     
    struct  ZipTensorRef
     
    struct  ZipTileAllocation
     Manages a pair of tile allocations as if they are one allocation. More...
     
    class  ZipTileIterator
     Constructs an iterator from a pair of iterators. More...
     
    @@ -314,85 +389,42 @@ Functions - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -402,6 +434,12 @@ Functions + + + + + + @@ -426,12 +464,24 @@ Functions - - - - - - + + + + + + + + + + + + + + + + + +

    Functions

    CUTLASS_HOST_DEVICE Coord< 4 > make_Coord (int _0, int _1, int _2, int _3)
     Helper to make a 4-element coordinate. More...
     
    CUTLASS_HOST_DEVICE Coord< 2 > get_Coord_hw (Coord< 3 > const &coord)
     Getter. More...
     
    CUTLASS_HOST_DEVICE Coord< 2 > get_Coord_hw (Coord< 4 > const &coord)
     Getter. More...
     
    CUTLASS_HOST_DEVICE Coord< 3 > get_Coord_hwc (Coord< 4 > const &coord)
     Getter. More...
     
    CUTLASS_HOST_DEVICE Coord< 3 > get_Coord_dhw (Coord< 4 > const &coord)
     Getter. More...
     
    template<typename Shape_ >
    CUTLASS_HOST_DEVICE Coord< 3 > make_Coord_from_shape ()
     
    template<int Rank>
    std::ostream & operator<< (std::ostream &out, Coord< Rank > const &coord)
     
    template<typename T >
    std::ostream & operator<< (std::ostream &out, ScalarIO< T > const &scalar)
     Default printing to ostream. More...
     
    template<>
    std::ostream & operator<< (std::ostream &out, ScalarIO< int8_t > const &scalar)
     Printing to ostream of int8_t as integer rather than character. More...
     
    template<>
    std::ostream & operator<< (std::ostream &out, ScalarIO< uint8_t > const &scalar)
     Printing to ostream of uint8_t as integer rather than character. More...
     
    template<>
    std::ostream & operator<< (std::ostream &out, ScalarIO< cutlass::Vector< cutlass::bin1_t, 32 > > const &scalar)
     Printing to ostream of vector of 1b elements. More...
     
    template<>
    std::ostream & operator<< (std::ostream &out, ScalarIO< cutlass::Vector< cutlass::int4_t, 8 > > const &scalar)
     Printing to ostream of vector of 4b signed integer elements. More...
     
    template<>
    std::ostream & operator<< (std::ostream &out, ScalarIO< cutlass::Vector< cutlass::uint4_t, 8 > > const &scalar)
     Printing to ostream of vector of 4b unsigned integer elements. More...
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_load (InputIterator &iterator, Fragment &fragment)
     Loads a fragment from an input iterator. More...
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_DEVICE void shared_iterator_load (InputIterator &iterator, Fragment &fragment)
     Loads a fragment from a shared memory input iterator. More...
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_DEVICE void shared_iterator_load (InputIterator &iterator, Fragment &fragment, int d)
     Loads a fragment from a shared memory input iterator. More...
     
    template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
     Loads a fragment from an input iterator, masked by a predicate iterator. More...
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset=0)
     Loads a fragment from an input iterator. More...
     
    template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)
     Loads a fragment from an input iterator. More...
     
    template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_load (InputIterator const &_iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_load (InputIterator const &iterator, Fragment &fragment, typename InputIterator::Index offset=0)
     Loads a fragment from an input iterator. More...
     
    template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_load (InputIterator const &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)
     Loads a fragment from an input iterator. More...
     
    template<typename OutputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_store (OutputIterator &iterator, Fragment &fragment)
     Stores a fragment to an output iterator. More...
     
    template<typename OutputIterator , typename Fragment >
    CUTLASS_DEVICE void shared_iterator_store (OutputIterator &iterator, Fragment const &fragment)
     Stores a fragment to a shared memory output iterator. More...
     
    template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
     Stores a fragment to an output iterator, masked by a predicate iterator. More...
     
    template<typename OutputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)
     Stores a fragment to an output iterator. More...
     
    template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)
     Stores a fragment to an output iterator. More...
     
    template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_store (OutputIterator const &_iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
     Stores a fragment to an output iterator, masked by a predicate iterator. More...
     
    template<typename OutputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_store (OutputIterator const &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)
     Stores a fragment to an output iterator. More...
     
    template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_store (OutputIterator const &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)
     Stores a fragment to an output iterator. More...
     
    template<typename dividend_t , typename divisor_t >
    CUTLASS_HOST_DEVICE dividend_t round_nearest (dividend_t dividend, divisor_t divisor)
     
    template<typename value_t >
    CUTLASS_HOST_DEVICE value_t lcm (value_t a, value_t b)
     
    template<typename value_t >
    CUTLASS_HOST_DEVICE value_t clz (value_t x)
     
    template<typename value_t >
    CUTLASS_HOST_DEVICE value_t find_log2 (value_t x)
     
    __host__ CUTLASS_DEVICE cudaError_t cuda_perror_impl (cudaError_t error, const char *filename, int line)
     The corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context. More...
     
    template<>
    struct __align__ (64) AlignedStruct< 64 >
     
    template<typename Scalar_ >
    CUTLASS_DEVICE void make_zero (Scalar_ &x)
     
    template<typename Scalar_ , int kLanes_>
    CUTLASS_DEVICE void make_zero (Vector< Scalar_, kLanes_ > &vec)
     
    template<typename Scalar_ >
    CUTLASS_HOST_DEVICE void make_zero (Scalar_ &x)
     
    template<typename Scalar_ , int kLanes_>
    CUTLASS_HOST_DEVICE void make_zero (Vector< Scalar_, kLanes_ > &vec)
     
    template<typename First , typename Second >
    CUTLASS_HOST_DEVICE ZipFragment< First, Second > make_ZipFragment (First const &first, Second const &second)
     Helper to construct a ZipFragment object. More...
     
    template<typename First , typename Second >
    CUTLASS_HOST_DEVICE ZipConvert< First, Second > make_ZipConvert (First const &first, Second const &second)
     Helper to construct a ZipConvert object. More...
     
    template<typename First , typename Second >
    CUTLASS_HOST_DEVICE ZipTensorRef< First, Second > make_ZipTensorRef (First const &first, Second const &second)
     Constructs a ZipTensorRef. More...
     

    Function Documentation

    @@ -452,10 +502,30 @@ template<>
    +
    + + +

    ◆ __align__() [2/7]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::__align__ ()
    +
    +
    -

    ◆ __align__() [2/7]

    +

    ◆ __align__() [3/7]

    @@ -475,7 +545,7 @@ template<>
    -

    ◆ __align__() [3/7]

    +

    ◆ __align__() [4/7]

    @@ -495,7 +565,7 @@ template<>
    -

    ◆ __align__() [4/7]

    +

    ◆ __align__() [5/7]

    @@ -515,7 +585,7 @@ template<>
    -

    ◆ __align__() [5/7]

    +

    ◆ __align__() [6/7]

    @@ -535,7 +605,7 @@ template<>
    -

    ◆ __align__() [6/7]

    +

    ◆ __align__() [7/7]

    @@ -554,23 +624,24 @@ template<>
    - -

    ◆ __align__() [7/7]

    + +

    ◆ clz()

    -template<>
    +template<typename value_t >
    - + - - + +
    struct cutlass::__align__ CUTLASS_HOST_DEVICE value_t cutlass::clz ()value_t x)
    +

    log2 computation, what's the difference between the below codes and log2_up/down codes?

    @@ -607,6 +678,26 @@ template<>
    Returns
    The CUDA error.
    +
    + + +

    ◆ find_log2()

    + +
    +
    +
    +template<typename value_t >
    + + + + + + + + +
    CUTLASS_HOST_DEVICE value_t cutlass::find_log2 (value_t x)
    +
    +
    @@ -638,82 +729,10 @@ template<typename value_t >

    Greatest common divisor

    -
    - - -

    ◆ get_Coord_dhw()

    - -
    -
    - - - - - - - - -
    CUTLASS_HOST_DEVICE Coord<3> cutlass::get_Coord_dhw (Coord< 4 > const & coord)
    -
    - -
    -
    - -

    ◆ get_Coord_hw() [1/2]

    - -
    -
    - - - - - - - - -
    CUTLASS_HOST_DEVICE Coord<2> cutlass::get_Coord_hw (Coord< 3 > const & coord)
    -
    - -
    -
    - -

    ◆ get_Coord_hw() [2/2]

    - -
    -
    - - - - - - - - -
    CUTLASS_HOST_DEVICE Coord<2> cutlass::get_Coord_hw (Coord< 4 > const & coord)
    -
    - -
    -
    - -

    ◆ get_Coord_hwc()

    - -
    -
    - - - - - - - - -
    CUTLASS_HOST_DEVICE Coord<3> cutlass::get_Coord_hwc (Coord< 4 > const & coord)
    -
    -
    -

    ◆ iterator_load() [1/4]

    +

    ◆ iterator_load()

    @@ -740,238 +759,10 @@ template<typename InputIterator , typename Fragment >
    -
    - - -

    ◆ iterator_load() [2/4]

    - -
    -
    -
    -template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const & _iterator,
    Fragmentfragment,
    typename InputIterator::Index offset,
    ConstPredicateAdapter predicate_adapter 
    )
    -
    - -
    -
    - -

    ◆ iterator_load() [3/4]

    - -
    -
    -
    -template<typename InputIterator , typename Fragment >
    - - - - - - - - - - - - - - - - - - - - - - - - -
    CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const & iterator,
    Fragmentfragment,
    typename InputIterator::Index offset = 0 
    )
    -
    - -
    -
    - -

    ◆ iterator_load() [4/4]

    - -
    -
    -
    -template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    - - - - - - - - - - - - - - - - - - - - - - - - -
    CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const & iterator,
    Fragmentfragment,
    ConstPredicateAdapter pred_it 
    )
    -
    - -
    -
    - -

    ◆ iterator_load_post_increment() [1/3]

    - -
    -
    -
    -template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator & iterator,
    Fragmentfragment,
    typename InputIterator::Index offset,
    ConstPredicateAdapter predicate_adapter 
    )
    -
    - -
    -
    - -

    ◆ iterator_load_post_increment() [2/3]

    - -
    -
    -
    -template<typename InputIterator , typename Fragment >
    - - - - - - - - - - - - - - - - - - - - - - - - -
    CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator & iterator,
    Fragmentfragment,
    typename InputIterator::Index offset = 0 
    )
    -
    - -
    -
    - -

    ◆ iterator_load_post_increment() [3/3]

    - -
    -
    -
    -template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    - - - - - - - - - - - - - - - - - - - - - - - - -
    CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator & iterator,
    Fragmentfragment,
    ConstPredicateAdapter pred_it 
    )
    -
    -
    -

    ◆ iterator_store() [1/4]

    +

    ◆ iterator_store()

    @@ -998,234 +789,6 @@ template<typename OutputIterator , typename Fragment >
    -
    - - -

    ◆ iterator_store() [2/4]

    - -
    -
    -
    -template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const & _iterator,
    Fragment const & fragment,
    typename OutputIterator::Index offset,
    ConstPredicateAdapter predicate_adapter 
    )
    -
    - -
    -
    - -

    ◆ iterator_store() [3/4]

    - -
    -
    -
    -template<typename OutputIterator , typename Fragment >
    - - - - - - - - - - - - - - - - - - - - - - - - -
    CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const & iterator,
    Fragment const & fragment,
    typename OutputIterator::Index offset = 0 
    )
    -
    - -
    -
    - -

    ◆ iterator_store() [4/4]

    - -
    -
    -
    -template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    - - - - - - - - - - - - - - - - - - - - - - - - -
    CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const & iterator,
    Fragment const & fragment,
    ConstPredicateAdapter pred_it 
    )
    -
    - -
    -
    - -

    ◆ iterator_store_post_increment() [1/3]

    - -
    -
    -
    -template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator & iterator,
    Fragment const & fragment,
    typename OutputIterator::Index offset,
    ConstPredicateAdapter predicate_adapter 
    )
    -
    - -
    -
    - -

    ◆ iterator_store_post_increment() [2/3]

    - -
    -
    -
    -template<typename OutputIterator , typename Fragment >
    - - - - - - - - - - - - - - - - - - - - - - - - -
    CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator & iterator,
    Fragment const & fragment,
    typename OutputIterator::Index offset = 0 
    )
    -
    - -
    -
    - -

    ◆ iterator_store_post_increment() [3/3]

    - -
    -
    -
    -template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    - - - - - - - - - - - - - - - - - - - - - - - - -
    CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator & iterator,
    Fragment const & fragment,
    ConstPredicateAdapter pred_it 
    )
    -
    -
    @@ -1379,8 +942,27 @@ template<typename value_t > - -

    ◆ make_zero() [1/2]

    + +

    ◆ make_Coord_from_shape()

    + +
    +
    +
    +template<typename Shape_ >
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<3> cutlass::make_Coord_from_shape ()
    +
    + +
    +
    + +

    ◆ make_zero() [1/2]

    @@ -1388,7 +970,7 @@ template<typename value_t >
    template<typename Scalar_ >
    - + @@ -1399,8 +981,8 @@ template<typename Scalar_ > - -

    ◆ make_zero() [2/2]

    + +

    ◆ make_zero() [2/2]

    @@ -1408,7 +990,7 @@ template<typename Scalar_ >
    template<typename Scalar_ , int kLanes_>
    CUTLASS_DEVICE void cutlass::make_zero CUTLASS_HOST_DEVICE void cutlass::make_zero ( Scalar_ &  x)
    - + @@ -1417,6 +999,354 @@ template<typename Scalar_ , int kLanes_>
    CUTLASS_DEVICE void cutlass::make_zero CUTLASS_HOST_DEVICE void cutlass::make_zero ( Vector< Scalar_, kLanes_ > &  vec)
    +
    + + +

    ◆ make_ZipConvert()

    + +
    +
    +
    +template<typename First , typename Second >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE ZipConvert<First, Second> cutlass::make_ZipConvert (First const & first,
    Second const & second 
    )
    +
    + +
    +
    + +

    ◆ make_ZipFragment()

    + +
    +
    +
    +template<typename First , typename Second >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE ZipFragment<First, Second> cutlass::make_ZipFragment (First const & first,
    Second const & second 
    )
    +
    + +
    +
    + +

    ◆ make_ZipTensorRef()

    + +
    +
    +
    +template<typename First , typename Second >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE ZipTensorRef<First, Second> cutlass::make_ZipTensorRef (First const & first,
    Second const & second 
    )
    +
    + +
    +
    + +

    ◆ operator<<() [1/7]

    + +
    +
    +
    +template<int Rank>
    + + + + + + + + + + + + + + + + + + +
    std::ostream& cutlass::operator<< (std::ostream & out,
    Coord< Rank > const & coord 
    )
    +
    + +
    +
    + +

    ◆ operator<<() [2/7]

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    std::ostream& cutlass::operator<< (std::ostream & out,
    ScalarIO< T > const & scalar 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator<<() [3/7]

    + +
    +
    +
    +template<>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    std::ostream& cutlass::operator<< (std::ostream & out,
    ScalarIO< int8_t > const & scalar 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator<<() [4/7]

    + +
    +
    +
    +template<>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    std::ostream& cutlass::operator<< (std::ostream & out,
    ScalarIO< uint8_t > const & scalar 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator<<() [5/7]

    + +
    +
    +
    +template<>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    std::ostream& cutlass::operator<< (std::ostream & out,
    ScalarIO< cutlass::Vector< cutlass::bin1_t, 32 > > const & scalar 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator<<() [6/7]

    + +
    +
    +
    +template<>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    std::ostream& cutlass::operator<< (std::ostream & out,
    ScalarIO< cutlass::Vector< cutlass::int4_t, 8 > > const & scalar 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator<<() [7/7]

    + +
    +
    +
    +template<>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    std::ostream& cutlass::operator<< (std::ostream & out,
    ScalarIO< cutlass::Vector< cutlass::uint4_t, 8 > > const & scalar 
    )
    +
    +inline
    +
    +
    @@ -1448,108 +1378,12 @@ template<typename dividend_t , typename divisor_t >

    Round dividend up to the nearest multiple of divisor

    -
    - - -

    ◆ shared_iterator_load() [1/2]

    - -
    -
    -
    -template<typename InputIterator , typename Fragment >
    - - - - - - - - - - - - - - - - - - -
    CUTLASS_DEVICE void cutlass::shared_iterator_load (InputIterator & iterator,
    Fragmentfragment 
    )
    -
    - -
    -
    - -

    ◆ shared_iterator_load() [2/2]

    - -
    -
    -
    -template<typename InputIterator , typename Fragment >
    - - - - - - - - - - - - - - - - - - - - - - - - -
    CUTLASS_DEVICE void cutlass::shared_iterator_load (InputIterator & iterator,
    Fragmentfragment,
    int d 
    )
    -
    - -
    -
    - -

    ◆ shared_iterator_store()

    - -
    -
    -
    -template<typename OutputIterator , typename Fragment >
    - - - - - - - - - - - - - - - - - - -
    CUTLASS_DEVICE void cutlass::shared_iterator_store (OutputIterator & iterator,
    Fragment const & fragment 
    )
    -
    -
    diff --git a/docs/namespacecutlass_1_1detail.html b/docs/namespacecutlass_1_1detail.html new file mode 100644 index 000000000..154ce5c45 --- /dev/null +++ b/docs/namespacecutlass_1_1detail.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: cutlass::detail Namespace Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::detail Namespace Reference
    +
    +
    + + + + +

    +Classes

    class  ScalarOrPointer
     
    +
    + + + + diff --git a/docs/namespacecutlass_1_1gemm.html b/docs/namespacecutlass_1_1gemm.html index 1c84e4480..1545f43f4 100644 --- a/docs/namespacecutlass_1_1gemm.html +++ b/docs/namespacecutlass_1_1gemm.html @@ -84,19 +84,28 @@ $(function() { Classes struct  ClearAccumulators   +struct  ColumnMajorBlockSwizzle +  struct  DgemmConfig   struct  DgemmTraits   +struct  Fp16SgemmConfig +  +struct  Fp16SgemmSgemmTraits +  struct  FragmentMultiplyAdd   -struct  FragmentMultiplyAdd< half > +struct  FragmentMultiplyAdd< half, half, true >   struct  Gemm   struct  GemmConfig   +struct  GemmCoord +  struct  GemmDesc + GEMM problem description. More...
      struct  GemmEpilogue   @@ -151,7 +160,8 @@ Classes   struct  GlobalLoadStream   -struct  GlobalLoadStreamBase +struct  GlobalLoadStreamPair + Collect the global load streams for multiplicands. More...
      struct  HgemmConfig   @@ -187,9 +197,7 @@ Classes   struct  IgemmConfig   -struct  IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ > -  -struct  IgemmContiguousGlobalTileTraits +struct  IgemmConfig< OutputTile_, int8_t, ThreadGemmShape_ >   struct  IgemmEpilogue   @@ -205,6 +213,8 @@ Classes   struct  IgemmFloatToInt8Converter   +struct  IgemmGlobalIteratorAb +  struct  IgemmGlobalLoadTransformer   struct  IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float > @@ -213,6 +223,8 @@ Classes   struct  IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >   +struct  IgemmGlobalTileTraits +  struct  IgemmInt8ToFloatConverter   struct  IgemmSharedStoreTransformer @@ -221,11 +233,15 @@ Classes   struct  IgemmTileTraitsHelperA   -struct  IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > +struct  IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_, Index_ > +  +struct  IgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_, Index_ >   struct  IgemmTileTraitsHelperB   -struct  IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > +struct  IgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_, Index_ > +  +struct  IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_, Index_ >   struct  IgemmTraits   @@ -243,9 +259,17 @@ Classes   struct  IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >   +struct  Launch + Partial specialization for launching the GEMM kernel with or without launch bounds. More...
    +  +struct  Launch< Gemm, false > + Partial specialization for launching the GEMM kernel with or without launch bounds. More...
    +  struct  LinearScaling  Functor to compute linear combination of fragments. More...
      +struct  LinearScalingDevicePtr +  struct  ProjectOperand   struct  ProjectOperand< GemmOperand::kA, Kstrided > @@ -264,26 +288,39 @@ Classes   struct  ReshapeThreads< Tile_, Threads_, true >   +struct  RowMajorBlockSwizzle +  struct  SgemmConfig   +struct  SgemmLBTraits + Helper to define SGEMM traits using Launch Bounds. More...
    +  struct  SgemmTraits   struct  SharedLoadStream   +struct  SharedStreamPair + Collect the global load streams for multiplicands. More...
    +  struct  SimplifiedGemmEpilogueTraits   struct  SimplifiedGemmTraits   struct  SimplifiedGemmTraitsHelper   +struct  swizzleDirection +  struct  ThreadMultiplyAdd  Template performing matrix multiply-add operation within a thread. More...
      -struct  ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half > - Template performing matrix multiply-add operation within a thread. More...
    +struct  ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float > + Template performing matrix multiply-add operation within a thread. More...
      -struct  ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int > - Template performing matrix multiply-add operation within a thread. More...
    +struct  ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, half > + Template performing matrix multiply-add operation within a thread. More...
    +  +struct  ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, int8_t, int8_t, int > + Template performing matrix multiply-add operation within a thread. More...
      struct  WmmaGemmGlobalIteratorCd   @@ -292,18 +329,29 @@ Classes - - - + + + + + + + + + + + + + +

    Functions

    template<typename Gemm_ >
    __global__ void gemm_kernel (typename Gemm_::Params params)
     
    template<typename Gemm_ >
    __global__ __launch_bounds__ (Gemm_::kThreads) void gemm_kernel(typename Gemm_
     GEMM kernel with launch bounds specified. More...
     
    template<typename Gemm_ >
    __global__ void gemm_kernel_nolb (typename Gemm_::Params params)
     GEMM kernel without launch bounds specified. More...
     
    template<typename T >
    CUTLASS_DEVICE bool is_zero (T x)
     
    CUTLASS_DEVICE bool is_zero (half x)
     
    template<enum swizzleDirection::Kind >
    CUTLASS_DEVICE int getLinearIdx (int groups)
     
    template<>
    CUTLASS_DEVICE int getLinearIdx< swizzleDirection::Boustrophedon > (int groups)
     

    Function Documentation

    - -

    ◆ gemm_kernel()

    + +

    ◆ __launch_bounds__()

    @@ -311,7 +359,27 @@ Functions template<typename Gemm_ >
    - + + + + + + +
    __global__ void cutlass::gemm::gemm_kernel __global__ cutlass::gemm::__launch_bounds__ (Gemm_::kThreads )
    +
    + +
    + + +

    ◆ gemm_kernel_nolb()

    + +
    +
    +
    +template<typename Gemm_ >
    + + + @@ -320,6 +388,46 @@ template<typename Gemm_ >
    __global__ void cutlass::gemm::gemm_kernel_nolb ( typename Gemm_::Params  params)
    +
    +
    + +

    ◆ getLinearIdx()

    + +
    +
    +
    +template<enum swizzleDirection::Kind >
    + + + + + + + + +
    CUTLASS_DEVICE int cutlass::gemm::getLinearIdx (int groups)
    +
    + +
    +
    + +

    ◆ getLinearIdx< swizzleDirection::Boustrophedon >()

    + +
    +
    +
    +template<>
    + + + + + + + + +
    CUTLASS_DEVICE int cutlass::gemm::getLinearIdx< swizzleDirection::Boustrophedon > (int groups)
    +
    +
    @@ -363,7 +471,7 @@ template<typename T > diff --git a/docs/namespacecutlass_1_1platform.html b/docs/namespacecutlass_1_1platform.html index 2bf30c0df..b62a896a7 100644 --- a/docs/namespacecutlass_1_1platform.html +++ b/docs/namespacecutlass_1_1platform.html @@ -122,6 +122,8 @@ Classes struct  bool_constant  std::bool_constant More...
      +class  complex +  struct  conditional  std::conditional (true specialization) More...
      @@ -256,6 +258,157 @@ Typedefs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -399,8 +552,8 @@ template<> - -

    ◆ __align__() [3/13]

    + +

    ◆ __align__() [3/13]

    @@ -410,7 +563,7 @@ template<>
    - + @@ -419,8 +572,8 @@ template<> - -

    ◆ __align__() [4/13]

    + +

    ◆ __align__() [4/13]

    @@ -430,7 +583,7 @@ template<>
    - + @@ -439,8 +592,8 @@ template<> - -

    ◆ __align__() [5/13]

    + +

    ◆ __align__() [5/13]

    @@ -450,7 +603,7 @@ template<>
    - + @@ -477,10 +630,90 @@ template<>

    Functions

    CUTLASS_HOST_DEVICE float const & real (cuFloatComplex const &z)
     Returns the real part of the complex number. More...
     
    CUTLASS_HOST_DEVICE float & real (cuFloatComplex &z)
     Returns the real part of the complex number. More...
     
    CUTLASS_HOST_DEVICE double const & real (cuDoubleComplex const &z)
     Returns the real part of the complex number. More...
     
    CUTLASS_HOST_DEVICE double & real (cuDoubleComplex &z)
     Returns the real part of the complex number. More...
     
    CUTLASS_HOST_DEVICE float const & imag (cuFloatComplex const &z)
     Returns the imaginary part of the complex number. More...
     
    CUTLASS_HOST_DEVICE float & imag (cuFloatComplex &z)
     Returns the imaginary part of the complex number. More...
     
    CUTLASS_HOST_DEVICE double const & imag (cuDoubleComplex const &z)
     Returns the imaginary part of the complex number. More...
     
    CUTLASS_HOST_DEVICE double & imag (cuDoubleComplex &z)
     Returns the imaginary part of the complex number. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE T const & real (complex< T > const &z)
     Returns the real part of the complex number. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE T & real (complex< T > &z)
     Returns the real part of the complex number. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE T const & imag (complex< T > const &z)
     Returns the imaginary part of the complex number. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE T & imag (complex< T > &z)
     Returns the imaginary part of the complex number. More...
     
    template<typename T >
    std::ostream & operator<< (std::ostream &out, complex< T > const &z)
     
    template<typename T >
    CUTLASS_HOST_DEVICE bool operator== (complex< T > const &lhs, complex< T > const &rhs)
     Equality operator. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE bool operator!= (complex< T > const &lhs, complex< T > const &rhs)
     Inequality operator. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > operator+ (complex< T > const &lhs, complex< T > const &rhs)
     Addition. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > operator- (complex< T > const &lhs, complex< T > const &rhs)
     Subtraction. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > operator* (complex< T > const &lhs, complex< T > const &rhs)
     Multiplication. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > operator* (complex< T > const &lhs, T const &s)
     Scalar Multiplication. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > operator* (T const &s, complex< T > const &rhs)
     Scalar Multiplication. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > operator/ (complex< T > const &lhs, complex< T > const &rhs)
     Division. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > operator/ (complex< T > const &lhs, T const &s)
     Scalar Division. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > operator/ (T const &s, complex< T > const &rhs)
     Scalar divided by complex. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > & operator+= (complex< T > &lhs, complex< T > const &rhs)
     Addition. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > & operator-= (complex< T > &lhs, complex< T > const &rhs)
     Subtraction. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > & operator*= (complex< T > &lhs, complex< T > const &rhs)
     Multiplication. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > & operator*= (complex< T > &lhs, T s)
     Scalar multiplication. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > & operator/= (complex< T > &lhs, complex< T > const &rhs)
     Division. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICEabs (complex< T > const &z)
     Returns the magnitude of the complex number. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICEarg (complex< T > const &z)
     Returns the magnitude of the complex number. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICEnorm (complex< T > const &z)
     Returns the squared magnitude. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > conj (complex< T > const &z)
     Returns the complex conjugate. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > proj (complex< T > const &z)
     Projects the complex number z onto the Riemann sphere. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > polar (T const &r, T const &theta=T())
     Returns a complex number with magnitude r and phase theta. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > exp (complex< T > const &z)
     Computes the complex exponential of z. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > log (complex< T > const &z)
     Computes the complex exponential of z. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > log10 (complex< T > const &z)
     Computes the complex exponential of z. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > sqrt (complex< T > const &z)
     Computes the square root of complex number z. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > cos (complex< T > const &z)
     Computes the cosine of complex z. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE complex< T > sin (complex< T > const &z)
     Computes the sin of complex z. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & min (const T &a, const T &b)
     std::min More...
    struct cutlass::platform::__align__ (64  )
    struct cutlass::platform::__align__ (128  )
    struct cutlass::platform::__align__ (256 16  )
    +
    + + +

    ◆ __align__() [7/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (32 )
    +
    + +
    +
    + +

    ◆ __align__() [8/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (64 )
    +
    + +
    +
    + +

    ◆ __align__() [9/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (128 )
    +
    + +
    +
    + +

    ◆ __align__() [10/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (256 )
    +
    +
    -

    ◆ __align__() [7/13]

    +

    ◆ __align__() [11/13]

    @@ -500,7 +733,7 @@ template<>
    -

    ◆ __align__() [8/13]

    +

    ◆ __align__() [12/13]

    @@ -520,7 +753,7 @@ template<>
    -

    ◆ __align__() [9/13]

    +

    ◆ __align__() [13/13]

    @@ -539,19 +772,19 @@ template<>
    - -

    ◆ __align__() [10/13]

    + +

    ◆ abs()

    -template<>
    +template<typename T >
    - + - - + +
    struct cutlass::platform::__align__ CUTLASS_HOST_DEVICE T cutlass::platform::abs (32 )complex< T > const & z)
    @@ -559,19 +792,19 @@ template<>
    - -

    ◆ __align__() [11/13]

    + +

    ◆ arg()

    -template<>
    +template<typename T >
    - + - - + +
    struct cutlass::platform::__align__ CUTLASS_HOST_DEVICE T cutlass::platform::arg ()complex< T > const & z)
    @@ -579,19 +812,19 @@ template<>
    - -

    ◆ __align__() [12/13]

    + +

    ◆ conj()

    -template<>
    +template<typename T >
    - + - - + +
    struct cutlass::platform::__align__ CUTLASS_HOST_DEVICE complex<T> cutlass::platform::conj ()complex< T > const & z)
    @@ -599,19 +832,191 @@ template<>
    - -

    ◆ __align__() [13/13]

    + +

    ◆ cos()

    -template<>
    +template<typename T >
    - + - - + + + + +
    struct cutlass::platform::__align__ CUTLASS_HOST_DEVICE complex<T> cutlass::platform::cos (16 )complex< T > const & z)
    +
    + +
    + + +

    ◆ exp()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T> cutlass::platform::exp (complex< T > const & z)
    +
    + +
    +
    + +

    ◆ imag() [1/6]

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE float const& cutlass::platform::imag (cuFloatComplex const & z)
    +
    + +
    +
    + +

    ◆ imag() [2/6]

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE float& cutlass::platform::imag (cuFloatComplex & z)
    +
    + +
    +
    + +

    ◆ imag() [3/6]

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE double const& cutlass::platform::imag (cuDoubleComplex const & z)
    +
    + +
    +
    + +

    ◆ imag() [4/6]

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE double& cutlass::platform::imag (cuDoubleComplex & z)
    +
    + +
    +
    + +

    ◆ imag() [5/6]

    + +
    +
    +
    +template<typename T >
    + + + + + + + + +
    CUTLASS_HOST_DEVICE T const& cutlass::platform::imag (complex< T > const & z)
    +
    + +
    +
    + +

    ◆ imag() [6/6]

    + +
    +
    +
    +template<typename T >
    + + + + + + + + +
    CUTLASS_HOST_DEVICE T& cutlass::platform::imag (complex< T > & z)
    +
    + +
    +
    + +

    ◆ log()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T> cutlass::platform::log (complex< T > const & z)
    +
    + +
    +
    + +

    ◆ log10()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    CUTLASS_HOST_DEVICE complex<T> cutlass::platform::log10 (complex< T > const & z)
    @@ -707,10 +1112,30 @@ template<typename T >
    +
    + + +

    ◆ norm()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + +
    CUTLASS_HOST_DEVICE T cutlass::platform::norm (complex< T > const & z)
    +
    +
    -

    ◆ operator!=()

    +

    ◆ operator!=() [1/2]

    @@ -737,6 +1162,426 @@ template<class T1 , class T2 >
    +
    + + +

    ◆ operator!=() [2/2]

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE bool cutlass::platform::operator!= (complex< T > const & lhs,
    complex< T > const & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator*() [1/3]

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T> cutlass::platform::operator* (complex< T > const & lhs,
    complex< T > const & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator*() [2/3]

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T> cutlass::platform::operator* (complex< T > const & lhs,
    T const & s 
    )
    +
    + +
    +
    + +

    ◆ operator*() [3/3]

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T> cutlass::platform::operator* (T const & s,
    complex< T > const & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator*=() [1/2]

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T>& cutlass::platform::operator*= (complex< T > & lhs,
    complex< T > const & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator*=() [2/2]

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T>& cutlass::platform::operator*= (complex< T > & lhs,
    s 
    )
    +
    + +
    +
    + +

    ◆ operator+()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T> cutlass::platform::operator+ (complex< T > const & lhs,
    complex< T > const & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator+=()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T>& cutlass::platform::operator+= (complex< T > & lhs,
    complex< T > const & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator-()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T> cutlass::platform::operator- (complex< T > const & lhs,
    complex< T > const & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator-=()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T>& cutlass::platform::operator-= (complex< T > & lhs,
    complex< T > const & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator/() [1/3]

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T> cutlass::platform::operator/ (complex< T > const & lhs,
    complex< T > const & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator/() [2/3]

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T> cutlass::platform::operator/ (complex< T > const & lhs,
    T const & s 
    )
    +
    + +
    +
    + +

    ◆ operator/() [3/3]

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T> cutlass::platform::operator/ (T const & s,
    complex< T > const & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator/=()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T>& cutlass::platform::operator/= (complex< T > & lhs,
    complex< T > const & rhs 
    )
    +
    +
    @@ -767,6 +1612,36 @@ template<class T1 , class T2 >
    +
    + + +

    ◆ operator<<()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    std::ostream& cutlass::platform::operator<< (std::ostream & out,
    complex< T > const & z 
    )
    +
    +
    @@ -797,10 +1672,40 @@ template<class T1 , class T2 >
    +
    + + +

    ◆ operator==() [1/2]

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE bool cutlass::platform::operator== (complex< T > const & lhs,
    complex< T > const & rhs 
    )
    +
    +
    -

    ◆ operator==()

    +

    ◆ operator==() [2/2]

    @@ -887,6 +1792,208 @@ template<class T1 , class T2 >
    +
    + + +

    ◆ polar()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T> cutlass::platform::polar (T const & r,
    T const & theta = T() 
    )
    +
    + +
    +
    + +

    ◆ proj()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T> cutlass::platform::proj (complex< T > const & z)
    +
    + +
    +
    + +

    ◆ real() [1/6]

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE float const& cutlass::platform::real (cuFloatComplex const & z)
    +
    + +
    +
    + +

    ◆ real() [2/6]

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE float& cutlass::platform::real (cuFloatComplex & z)
    +
    + +
    +
    + +

    ◆ real() [3/6]

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE double const& cutlass::platform::real (cuDoubleComplex const & z)
    +
    + +
    +
    + +

    ◆ real() [4/6]

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE double& cutlass::platform::real (cuDoubleComplex & z)
    +
    + +
    +
    + +

    ◆ real() [5/6]

    + +
    +
    +
    +template<typename T >
    + + + + + + + + +
    CUTLASS_HOST_DEVICE T const& cutlass::platform::real (complex< T > const & z)
    +
    + +
    +
    + +

    ◆ real() [6/6]

    + +
    +
    +
    +template<typename T >
    + + + + + + + + +
    CUTLASS_HOST_DEVICE T& cutlass::platform::real (complex< T > & z)
    +
    + +
    +
    + +

    ◆ sin()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T> cutlass::platform::sin (complex< T > const & z)
    +
    + +
    +
    + +

    ◆ sqrt()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + +
    CUTLASS_HOST_DEVICE complex<T> cutlass::platform::sqrt (complex< T > const & z)
    +
    +
    @@ -930,7 +2037,7 @@ template<typename T , typename Deleter > diff --git a/docs/namespacemembers.html b/docs/namespacemembers.html index 9566721d1..a522eab71 100644 --- a/docs/namespacemembers.html +++ b/docs/namespacemembers.html @@ -73,22 +73,54 @@ $(function() {

    - _ -

    + + +

    - a -

    - c -

    +

    - e -

    + +

    - f -

    @@ -96,36 +128,30 @@ $(function() {
  • gcd() : cutlass
  • -
  • gemm_kernel() -: cutlass::gemm +
  • gemm_kernel_nolb() +: cutlass::gemm
  • -
  • get_Coord_dhw() -: cutlass +
  • getLinearIdx() +: cutlass::gemm
  • -
  • get_Coord_hw() -: cutlass -
  • -
  • get_Coord_hwc() -: cutlass +
  • getLinearIdx< swizzleDirection::Boustrophedon >() +: cutlass::gemm
  • - i -

    @@ -134,6 +160,12 @@ $(function() {
  • lcm() : cutlass
  • +
  • log() +: cutlass::platform +
  • +
  • log10() +: cutlass::platform +
  • @@ -141,11 +173,23 @@ $(function() {
  • make_Coord() : cutlass
  • +
  • make_Coord_from_shape() +: cutlass +
  • make_pair() : cutlass::platform
  • make_zero() -: cutlass +: cutlass +
  • +
  • make_ZipConvert() +: cutlass +
  • +
  • make_ZipFragment() +: cutlass +
  • +
  • make_ZipTensorRef() +: cutlass
  • max() : cutlass::platform @@ -156,18 +200,53 @@ $(function() { +

    - n -

    + +

    - o -

    +

    - p -

    + +

    - r -

    +

    - n -

    + +

    - o -

    +

    - p -

    + +

    - r -