diff --git a/nvbench/json_printer.cu b/nvbench/json_printer.cu index 5cfc11a..4e17359 100644 --- a/nvbench/json_printer.cu +++ b/nvbench/json_printer.cu @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -110,27 +111,45 @@ void write_named_values(JsonNode &node, const nvbench::named_values &values) // see: https://github.com/NVIDIA/nvbench/issues/255 static constexpr std::size_t preferred_buffer_nbytes = 4096; -template -void write_out_values(std::ofstream &out, const std::vector &data) +template +void swap_bytes_impl(char *p, std::index_sequence) { - static constexpr std::size_t value_nbytes = sizeof(nvbench::float32_t); + ((std::swap(p[Is], p[N - 1 - Is])), ...); +} + +template +void big_endian_to_little_endian(char *word) +{ + if constexpr (WordSize > 1) + { + static_assert((WordSize & (WordSize - 1)) == 0, "WordSize must be a power of two"); + swap_bytes_impl(word, std::make_index_sequence{}); + } +} + +template +void write_out_values_as(std::ofstream &out, const std::vector &data) +{ + static_assert(std::is_floating_point_v); + static_assert(std::is_convertible_v); + + static constexpr std::size_t value_nbytes = sizeof(StorageT); static_assert(buffer_nbytes % value_nbytes == 0); - alignas(alignof(nvbench::float32_t)) char buffer[buffer_nbytes]; + alignas(alignof(StorageT)) char buffer[buffer_nbytes]; std::size_t bytes_in_buffer = 0; for (auto value64 : data) { - const auto value32 = static_cast(value64); + const auto value = static_cast(value64); auto value_subbuffer = &buffer[bytes_in_buffer]; - std::memcpy(value_subbuffer, &value32, value_nbytes); + std::memcpy(value_subbuffer, &value, value_nbytes); // the c++17 implementation of is_little_endian isn't constexpr, but // all supported compilers optimize this branch as if it were. if (!is_little_endian()) { - std::swap(value_subbuffer[0], value_subbuffer[3]); - std::swap(value_subbuffer[1], value_subbuffer[2]); + big_endian_to_little_endian(value_subbuffer); } bytes_in_buffer += value_nbytes; @@ -149,6 +168,20 @@ void write_out_values(std::ofstream &out, const std::vector } } +// save data using statically downcasting to float32 format +template +void write_out_values_as_float32(std::ofstream &out, const std::vector &data) +{ + write_out_values_as(out, data); +} + +// save data using float64 format +template +void write_out_values_as_float64(std::ofstream &out, const std::vector &data) +{ + write_out_values_as(out, data); +} + } // end namespace namespace nvbench @@ -210,7 +243,7 @@ void json_printer::do_process_bulk_data_float64(state &state, out.exceptions(out.exceptions() | std::ios::failbit | std::ios::badbit); out.open(result_path, std::ios::binary | std::ios::out); - write_out_values(out, data); + write_out_values_as_float32(out, data); } catch (std::exception &e) { @@ -270,7 +303,7 @@ void json_printer::do_process_bulk_data_float64(state &state, out.exceptions(out.exceptions() | std::ios::failbit | std::ios::badbit); out.open(result_path, std::ios::binary | std::ios::out); - write_out_values(out, data); + write_out_values_as_float32(out, data); } catch (std::exception &e) {