mirror of
https://github.com/NVIDIA/cutlass.git
synced 2026-05-11 17:00:05 +00:00
Updated mma_sm80.h to avoid perf penalty due to reinterpret_cast<>. (#100)
- Updated mma_sm80.h to avoid perf penalty due to reinterpret_cast<>. - Enhancement to CUTLASS Utility Library's HostTensorPlanarComplex template to support copy-in and copy-out - Added test_examples target to build and test all CUTLASS examples - Minor edits to documentation to point to GTC 2020 webinar
This commit is contained in:
@@ -20,15 +20,9 @@
|
||||
# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
cutlass_add_executable(
|
||||
cutlass_example_add_executable(
|
||||
03_visualize_layout
|
||||
visualize_layout.cpp
|
||||
register_layout.cu
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
03_visualize_layout
|
||||
PRIVATE
|
||||
CUTLASS
|
||||
cutlass_tools_util_includes
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user