Updated mma_sm80.h to avoid perf penalty due to reinterpret_cast<>. (#100)

- Updated mma_sm80.h to avoid perf penalty due to reinterpret_cast<>.
- Enhancement to CUTLASS Utility Library's HostTensorPlanarComplex template to support copy-in and copy-out
- Added test_examples target to build and test all CUTLASS examples
- Minor edits to documentation to point to GTC 2020 webinar
This commit is contained in:
Andrew Kerr
2020-06-15 10:47:01 -07:00
committed by GitHub
parent 86931fef85
commit 1ab1027954
11 changed files with 213 additions and 33 deletions

View File

@@ -20,15 +20,9 @@
# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cutlass_add_executable(
cutlass_example_add_executable(
03_visualize_layout
visualize_layout.cpp
register_layout.cu
)
target_link_libraries(
03_visualize_layout
PRIVATE
CUTLASS
cutlass_tools_util_includes
)