mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-02 12:41:26 +00:00
[CK] Fix windows build issues ## Motivation Full build on Windows is currently broken due to compiler errors, this PR should help fix that. This is also holding up the following PR in the TheRock: https://github.com/ROCm/TheRock/pull/3382 ## Technical Details 1. I don't see a good reason to be nesting a windows include inside the ck_tile namespace. It was causing compiler errors too: Windows.h comes with min and max, which was conflicting with ck_tile::min and ck_tile::max, so I moved it out. I also defined NOMINMAX to prevent this inclusion in the future. 2. The TRUE/FALSE macros are already used by Windows.h, which causes an error. So I've opted for True/False. You can see this pattern in other rocm-libraries. 3. The M_PI macro isn't available, at least in the WIN32_LEAN_AND_MEAN context, from \<cmath\> on Windows. We'll be able to use std::numbers::v_pi\<float\> when we have C++20 support. 4. There was a missing \<chrono\> include. ## Test Plan Test locally and make sure this doesn't impact existing CI. ## Test Result Compiles locally and passes existing ci. ## Submission Checklist - [ x ] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
90 lines
2.9 KiB
C++
90 lines
2.9 KiB
C++
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
#pragma once
|
|
|
|
#include "ck_tile/core.hpp"
|
|
#include "ck_tile/host/host_tensor.hpp"
|
|
|
|
#include <cassert>
|
|
#include <cmath>
|
|
|
|
#ifndef M_PI // Not there on windows...
|
|
#define M_PI 3.141592653589793238462643383279502884
|
|
#endif
|
|
|
|
#include <functional>
|
|
#include <iterator>
|
|
#include <optional>
|
|
#include <random>
|
|
#include <tuple>
|
|
|
|
// keep sync with RotaryEmbeddingEnum
|
|
enum class rope_enum
|
|
{
|
|
none = 0,
|
|
interleaved = 1,
|
|
half_rotated = 2,
|
|
};
|
|
|
|
template <typename DataType>
|
|
std::tuple<ck_tile::HostTensor<DataType>, ck_tile::HostTensor<DataType>>
|
|
generate_rotary_cos_sin(ck_tile::index_t seqlen,
|
|
ck_tile::index_t rotary_dim,
|
|
std::optional<unsigned> seed = std::nullopt)
|
|
{
|
|
// return dummy tensors if we won't apply RoPE at all
|
|
if(rotary_dim <= 0)
|
|
{
|
|
ck_tile::HostTensor<DataType> dummy({1, 1});
|
|
return std::make_tuple(dummy, dummy);
|
|
}
|
|
|
|
std::mt19937 random_engine(seed.has_value() ? *seed : std::random_device{}());
|
|
std::uniform_real_distribution<float> generator(0.0f, 1.0f);
|
|
|
|
const ck_tile::index_t num_rows = seqlen * 2;
|
|
const ck_tile::index_t num_cols = rotary_dim / 2;
|
|
|
|
using std::begin, std::end;
|
|
|
|
ck_tile::HostTensor<float> angle({num_rows, num_cols});
|
|
std::generate(begin(angle), end(angle), [&] { return generator(random_engine) * 2 * M_PI; });
|
|
|
|
ck_tile::HostTensor<DataType> cos({num_rows, num_cols});
|
|
std::transform(begin(angle), end(angle), begin(cos), [](float origin_value) {
|
|
return ck_tile::type_convert<DataType>(std::cos(origin_value));
|
|
});
|
|
|
|
ck_tile::HostTensor<DataType> sin({num_rows, num_cols});
|
|
std::transform(begin(angle), end(angle), begin(sin), [](float origin_value) {
|
|
return ck_tile::type_convert<DataType>(std::sin(origin_value));
|
|
});
|
|
|
|
return std::make_tuple(cos, sin);
|
|
}
|
|
|
|
template <typename DataType>
|
|
std::tuple<ck_tile::HostTensor<DataType>, ck_tile::HostTensor<DataType>>
|
|
slice_rotary_cos_sin(const ck_tile::HostTensor<DataType>& cos,
|
|
const ck_tile::HostTensor<DataType>& sin,
|
|
ck_tile::index_t seqlen_offset,
|
|
ck_tile::index_t seqlen)
|
|
{
|
|
assert(cos.get_num_of_dimension() == 2 && sin.get_num_of_dimension() == 2);
|
|
assert(cos.get_length(0) == sin.get_length(0) && cos.get_length(1) == sin.get_length(1));
|
|
|
|
assert(static_cast<std::size_t>(seqlen_offset + seqlen) <= cos.get_length(0));
|
|
|
|
const ck_tile::index_t num_rows = seqlen;
|
|
const ck_tile::index_t num_cols = cos.get_length(1);
|
|
|
|
ck_tile::HostTensor<DataType> cos_pt({num_rows, num_cols});
|
|
cos_pt.ForEach([&](auto& self, auto i) { self(i) = cos(i[0] + seqlen_offset, i[1]); });
|
|
|
|
ck_tile::HostTensor<DataType> sin_pt({num_rows, num_cols});
|
|
sin_pt.ForEach([&](auto& self, auto i) { self(i) = sin(i[0] + seqlen_offset, i[1]); });
|
|
|
|
return std::make_tuple(cos_pt, sin_pt);
|
|
}
|