mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-13 01:36:06 +00:00
add lds doble buffer to nchw padded v4r1 and v4r4
This commit is contained in:
@@ -59,7 +59,6 @@ struct GridwiseConvolutionImplicitGemm_v4r4_nchw_kcyx_nkhw_padded
|
||||
constexpr auto I1 = Number<1>{};
|
||||
constexpr auto I2 = Number<2>{};
|
||||
constexpr auto I3 = Number<3>{};
|
||||
constexpr auto I5 = Number<5>{};
|
||||
|
||||
constexpr auto True = integral_constant<bool, true>{};
|
||||
|
||||
@@ -330,7 +329,6 @@ struct GridwiseConvolutionImplicitGemm_v4r4_nchw_kcyx_nkhw_padded
|
||||
constexpr auto I1 = Number<1>{};
|
||||
constexpr auto I2 = Number<2>{};
|
||||
constexpr auto I3 = Number<3>{};
|
||||
constexpr auto I5 = Number<5>{};
|
||||
|
||||
constexpr auto True = integral_constant<bool, true>{};
|
||||
|
||||
|
||||
@@ -25,14 +25,14 @@ namespace ck {
|
||||
// repeat-length on the merged dimension need to be 1. These sanity checks are performed
|
||||
// in constructor of BlockwiseGenericTensorSliceCopy_v1
|
||||
template <index_t BlockSize,
|
||||
class SrcDesc,
|
||||
class DstDesc,
|
||||
class SliceLengths,
|
||||
class SubLengths,
|
||||
class ThreadClusterLengths,
|
||||
class ThreadClusterArrangeOrder,
|
||||
class SrcDimAccessOrder,
|
||||
class DstDimAccessOrder,
|
||||
typename SrcDesc,
|
||||
typename DstDesc,
|
||||
typename SliceLengths,
|
||||
typename SubLengths,
|
||||
typename ThreadClusterLengths,
|
||||
typename ThreadClusterArrangeOrder,
|
||||
typename SrcDimAccessOrder,
|
||||
typename DstDimAccessOrder,
|
||||
index_t SrcVectorAccessDim,
|
||||
index_t DstVectorAccessDim,
|
||||
index_t SrcDataPerAccess,
|
||||
@@ -204,7 +204,7 @@ struct BlockwiseGenericTensorSliceCopy_v1
|
||||
return GetRegisterBufferDescriptor().GetElementSpace();
|
||||
}
|
||||
|
||||
template <class TData>
|
||||
template <typename TData>
|
||||
__device__ void RunLoadRegisterBuffer(const TData* __restrict__ p_src,
|
||||
TData* __restrict__ p_buffer) const
|
||||
{
|
||||
@@ -260,7 +260,7 @@ struct BlockwiseGenericTensorSliceCopy_v1
|
||||
});
|
||||
}
|
||||
|
||||
template <class TData>
|
||||
template <typename TData>
|
||||
__device__ void RunStoreRegisterBuffer(const TData* __restrict__ p_buffer,
|
||||
TData* __restrict__ p_dst) const
|
||||
{
|
||||
@@ -315,7 +315,7 @@ struct BlockwiseGenericTensorSliceCopy_v1
|
||||
});
|
||||
}
|
||||
|
||||
template <class TData>
|
||||
template <typename TData>
|
||||
__device__ void Run(const TData* __restrict__ p_src, TData* __restrict__ p_dst) const
|
||||
{
|
||||
TData p_buffer[GetRegisterBufferSize()];
|
||||
@@ -406,7 +406,7 @@ struct BlockwiseGenericTensorSliceCopy_v1
|
||||
});
|
||||
}
|
||||
|
||||
template <class T, bool PositiveDirection>
|
||||
template <typename T, bool PositiveDirection>
|
||||
__device__ void
|
||||
MoveSrcSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection> positive_direction)
|
||||
{
|
||||
@@ -423,14 +423,14 @@ struct BlockwiseGenericTensorSliceCopy_v1
|
||||
// Slice a (normal or merged) tensor, and copy it into another (normal or merged) tensor
|
||||
// memory layout (ordering of dimensions) can be different between src and dst.
|
||||
template <index_t BlockSize,
|
||||
class SrcDesc,
|
||||
class DstDesc,
|
||||
class SliceLengths,
|
||||
class SubLengths,
|
||||
class ThreadClusterLengths,
|
||||
class ThreadClusterArrangeOrder,
|
||||
class SrcDimAccessOrder,
|
||||
class DstDimAccessOrder,
|
||||
typename SrcDesc,
|
||||
typename DstDesc,
|
||||
typename SliceLengths,
|
||||
typename SubLengths,
|
||||
typename ThreadClusterLengths,
|
||||
typename ThreadClusterArrangeOrder,
|
||||
typename SrcDimAccessOrder,
|
||||
typename DstDimAccessOrder,
|
||||
index_t SrcVectorAccessDim,
|
||||
index_t DstVectorAccessDim,
|
||||
index_t SrcDataPerAccess,
|
||||
@@ -482,19 +482,19 @@ struct BlockwiseGenericTensorSliceCopy_v2
|
||||
return RegisterBufferDesc::GetElementSpace();
|
||||
}
|
||||
|
||||
template <class TData>
|
||||
template <typename TData>
|
||||
__device__ void RunLoadRegisterBuffer(const TData* p_src, TData* p_buffer) const
|
||||
{
|
||||
mThreadwiseLoad.Run(p_src, p_buffer);
|
||||
}
|
||||
|
||||
template <class TData>
|
||||
template <typename TData>
|
||||
__device__ void RunStoreRegisterBuffer(const TData* p_buffer, TData* p_dst) const
|
||||
{
|
||||
mThreadwiseStore.Run(p_buffer, p_dst);
|
||||
}
|
||||
|
||||
template <class TData>
|
||||
template <typename TData>
|
||||
__device__ void Run(const TData* p_src, TData* p_dst) const
|
||||
{
|
||||
TData p_buffer[GetRegisterBufferSize()];
|
||||
@@ -503,14 +503,14 @@ struct BlockwiseGenericTensorSliceCopy_v2
|
||||
mThreadwiseStore.Run(p_buffer, p_dst);
|
||||
}
|
||||
|
||||
template <class T, bool PositiveDirection>
|
||||
template <typename T, bool PositiveDirection>
|
||||
__device__ void
|
||||
MoveSrcSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection> positive_direction)
|
||||
{
|
||||
mThreadwiseLoad.MoveSrcSliceWindow(step_sizes, positive_direction);
|
||||
}
|
||||
|
||||
template <class T, bool PositiveDirection>
|
||||
template <typename T, bool PositiveDirection>
|
||||
__device__ void
|
||||
MoveDstSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection> positive_direction)
|
||||
{
|
||||
@@ -546,14 +546,14 @@ struct BlockwiseGenericTensorSliceCopy_v2
|
||||
|
||||
// this version use TensorView and TensorCoordinate
|
||||
template <index_t BlockSize,
|
||||
class SrcTensor,
|
||||
class DstTensor,
|
||||
class SliceLengths,
|
||||
class SubLengths,
|
||||
class ThreadClusterLengths,
|
||||
class ThreadClusterArrangeOrder,
|
||||
class SrcDimAccessOrder,
|
||||
class DstDimAccessOrder,
|
||||
typename SrcTensor,
|
||||
typename DstTensor,
|
||||
typename SliceLengths,
|
||||
typename SubLengths,
|
||||
typename ThreadClusterLengths,
|
||||
typename ThreadClusterArrangeOrder,
|
||||
typename SrcDimAccessOrder,
|
||||
typename DstDimAccessOrder,
|
||||
index_t SrcVectorAccessDim,
|
||||
index_t DstVectorAccessDim,
|
||||
index_t SrcDataPerAccess,
|
||||
@@ -622,14 +622,14 @@ struct BlockwiseGenericTensorSliceCopy_v3
|
||||
mThreadwiseStore.Run();
|
||||
}
|
||||
|
||||
template <class T, bool PositiveDirection>
|
||||
template <typename T, bool PositiveDirection>
|
||||
__device__ void
|
||||
MoveSrcSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection> positive_direction)
|
||||
{
|
||||
mThreadwiseLoad.MoveSrcSliceWindow(step_sizes, positive_direction);
|
||||
}
|
||||
|
||||
template <class T, bool PositiveDirection>
|
||||
template <typename T, bool PositiveDirection>
|
||||
__device__ void
|
||||
MoveDstSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection> positive_direction)
|
||||
{
|
||||
@@ -669,14 +669,14 @@ struct BlockwiseGenericTensorSliceCopy_v3
|
||||
};
|
||||
|
||||
template <index_t BlockSize,
|
||||
class SrcDesc,
|
||||
class DstDesc,
|
||||
class SliceLengths,
|
||||
class SubLengths,
|
||||
class ThreadClusterLengths,
|
||||
class ThreadClusterArrangeOrder,
|
||||
class SrcDimAccessOrder,
|
||||
class DstDimAccessOrder,
|
||||
typename SrcDesc,
|
||||
typename DstDesc,
|
||||
typename SliceLengths,
|
||||
typename SubLengths,
|
||||
typename ThreadClusterLengths,
|
||||
typename ThreadClusterArrangeOrder,
|
||||
typename SrcDimAccessOrder,
|
||||
typename DstDimAccessOrder,
|
||||
index_t SrcVectorAccessDim,
|
||||
index_t DstVectorAccessDim,
|
||||
index_t SrcDataPerAccess,
|
||||
@@ -727,19 +727,19 @@ struct BlockwiseGenericTensorSliceCopy_v4
|
||||
return RegisterBufferDesc::GetElementSpace();
|
||||
}
|
||||
|
||||
template <class TData>
|
||||
template <typename TData>
|
||||
__device__ void RunLoadRegisterBuffer(const TData* p_src, TData* p_buffer) const
|
||||
{
|
||||
mThreadwiseLoad.Run(p_src, p_buffer);
|
||||
}
|
||||
|
||||
template <class TData>
|
||||
template <typename TData>
|
||||
__device__ void RunStoreRegisterBuffer(const TData* p_buffer, TData* p_dst) const
|
||||
{
|
||||
mThreadwiseStore.Run(p_buffer, p_dst);
|
||||
}
|
||||
|
||||
template <class TData>
|
||||
template <typename TData>
|
||||
__device__ void Run(const TData* p_src, TData* p_dst) const
|
||||
{
|
||||
TData p_buffer[GetRegisterBufferSize()];
|
||||
@@ -748,16 +748,18 @@ struct BlockwiseGenericTensorSliceCopy_v4
|
||||
mThreadwiseStore.Run(p_buffer, p_dst);
|
||||
}
|
||||
|
||||
template <class T, bool PositiveDirection>
|
||||
template <typename T, bool PositiveDirection>
|
||||
__device__ void
|
||||
MoveSrcSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection> positive_direction)
|
||||
MoveSrcSliceWindow(const T& step_sizes,
|
||||
integral_constant<bool, PositiveDirection> positive_direction)
|
||||
{
|
||||
mThreadwiseLoad.MoveSrcSliceWindow(step_sizes, positive_direction);
|
||||
}
|
||||
|
||||
template <class T, bool PositiveDirection>
|
||||
template <typename T, bool PositiveDirection>
|
||||
__device__ void
|
||||
MoveDstSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection> positive_direction)
|
||||
MoveDstSliceWindow(const T& step_sizes,
|
||||
integral_constant<bool, PositiveDirection> positive_direction)
|
||||
{
|
||||
mThreadwiseStore.MoveDstSliceWindow(step_sizes, positive_direction);
|
||||
}
|
||||
|
||||
@@ -1072,16 +1072,22 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
|
||||
}
|
||||
|
||||
template <class T, bool PositiveDirection>
|
||||
__device__ void MoveSrcSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection>)
|
||||
__device__ void MoveSrcSliceWindow(const T& step_sizes_,
|
||||
integral_constant<bool, PositiveDirection>)
|
||||
{
|
||||
const auto step_sizes = to_array(step_sizes_);
|
||||
|
||||
static_if<PositiveDirection>{}([&](auto) {
|
||||
mSrcSliceOrigin += step_sizes;
|
||||
mSrcSliceOrigin += to_array(step_sizes);
|
||||
}).Else([&](auto) { mSrcSliceOrigin -= step_sizes; });
|
||||
}
|
||||
|
||||
template <class T, bool PositiveDirection>
|
||||
__device__ void MoveDstSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection>)
|
||||
__device__ void MoveDstSliceWindow(const T& step_sizes_,
|
||||
integral_constant<bool, PositiveDirection>)
|
||||
{
|
||||
const auto step_sizes = to_array(step_sizes_);
|
||||
|
||||
static_if<PositiveDirection>{}([&](auto) {
|
||||
mDstSliceOrigin += step_sizes;
|
||||
}).Else([&](auto) { mDstSliceOrigin -= step_sizes; });
|
||||
|
||||
@@ -136,7 +136,7 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(InDesc,
|
||||
for(index_t i = 0; i < nrepeat; ++i)
|
||||
{
|
||||
constexpr auto gridwise_conv =
|
||||
#if 1
|
||||
#if 0
|
||||
GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw
|
||||
#else
|
||||
GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include "tensor.hpp"
|
||||
#include "gridwise_convolution_kernel_wrapper.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_padded.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buffer.hpp"
|
||||
|
||||
template <typename T,
|
||||
typename InDesc,
|
||||
@@ -101,44 +102,49 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_padded(InDesc,
|
||||
|
||||
for(index_t i = 0; i < nrepeat; ++i)
|
||||
{
|
||||
constexpr auto gridwise_conv = GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded<
|
||||
GridSize,
|
||||
BlockSize,
|
||||
T,
|
||||
decltype(in_nchw_desc),
|
||||
decltype(wei_kcyx_desc),
|
||||
decltype(out_nkhw_desc),
|
||||
ConvStrides,
|
||||
ConvDilations,
|
||||
LeftPads,
|
||||
RightPads,
|
||||
BPerBlock,
|
||||
KPerBlock,
|
||||
EPerBlock,
|
||||
GemmNRepeat,
|
||||
GemmMPerThreadSubC,
|
||||
GemmNPerThreadSubC,
|
||||
GemmMLevel0Cluster,
|
||||
GemmNLevel0Cluster,
|
||||
GemmMLevel1Cluster,
|
||||
GemmNLevel1Cluster,
|
||||
GemmKPerThreadLoop,
|
||||
GemmDataPerReadA,
|
||||
GemmDataPerReadB,
|
||||
InBlockCopySubLengths_E_N1_B_N2,
|
||||
InBlockCopyClusterLengths_E_N1_B_N2,
|
||||
InBlockCopyThreadClusterArrangeOrder,
|
||||
InBlockCopySrcAccessOrder,
|
||||
InBlockCopyDstAccessOrder,
|
||||
InBlockCopySrcDataPerRead_B,
|
||||
InBlockCopyDstDataPerWrite_N2,
|
||||
WeiBlockCopySubLengths_E_K,
|
||||
WeiBlockCopyClusterLengths_E_K,
|
||||
WeiBlockCopyThreadClusterArrangeOrder,
|
||||
WeiBlockCopySrcAccessOrder,
|
||||
WeiBlockCopyDstAccessOrder,
|
||||
WeiBlockCopySrcDataPerRead_E,
|
||||
WeiBlockCopyDstDataPerWrite_K>{};
|
||||
constexpr auto gridwise_conv =
|
||||
#if 0
|
||||
GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded
|
||||
#else
|
||||
GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buffer
|
||||
#endif
|
||||
<GridSize,
|
||||
BlockSize,
|
||||
T,
|
||||
decltype(in_nchw_desc),
|
||||
decltype(wei_kcyx_desc),
|
||||
decltype(out_nkhw_desc),
|
||||
ConvStrides,
|
||||
ConvDilations,
|
||||
LeftPads,
|
||||
RightPads,
|
||||
BPerBlock,
|
||||
KPerBlock,
|
||||
EPerBlock,
|
||||
GemmNRepeat,
|
||||
GemmMPerThreadSubC,
|
||||
GemmNPerThreadSubC,
|
||||
GemmMLevel0Cluster,
|
||||
GemmNLevel0Cluster,
|
||||
GemmMLevel1Cluster,
|
||||
GemmNLevel1Cluster,
|
||||
GemmKPerThreadLoop,
|
||||
GemmDataPerReadA,
|
||||
GemmDataPerReadB,
|
||||
InBlockCopySubLengths_E_N1_B_N2,
|
||||
InBlockCopyClusterLengths_E_N1_B_N2,
|
||||
InBlockCopyThreadClusterArrangeOrder,
|
||||
InBlockCopySrcAccessOrder,
|
||||
InBlockCopyDstAccessOrder,
|
||||
InBlockCopySrcDataPerRead_B,
|
||||
InBlockCopyDstDataPerWrite_N2,
|
||||
WeiBlockCopySubLengths_E_K,
|
||||
WeiBlockCopyClusterLengths_E_K,
|
||||
WeiBlockCopyThreadClusterArrangeOrder,
|
||||
WeiBlockCopySrcAccessOrder,
|
||||
WeiBlockCopyDstAccessOrder,
|
||||
WeiBlockCopySrcDataPerRead_E,
|
||||
WeiBlockCopyDstDataPerWrite_K>{};
|
||||
|
||||
float time = launch_kernel(run_gridwise_convolution_kernel<decltype(gridwise_conv), T>,
|
||||
dim3(GridSize),
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include "tensor.hpp"
|
||||
#include "gridwise_convolution_kernel_wrapper.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw_padded.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw_padded_lds_double_buffer.hpp"
|
||||
|
||||
template <class T,
|
||||
class InDesc,
|
||||
@@ -166,43 +167,48 @@ void device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw_padded(InDesc,
|
||||
|
||||
printf("%s: BlockSize %u, GridSize %u \n", __func__, BlockSize, GridSize);
|
||||
|
||||
constexpr auto gridwise_conv = GridwiseConvolutionImplicitGemm_v4r4_nchw_kcyx_nkhw_padded<
|
||||
GridSize,
|
||||
BlockSize,
|
||||
T,
|
||||
decltype(in_nchw_desc),
|
||||
decltype(wei_kcyx_desc),
|
||||
decltype(out_nkhw_desc),
|
||||
ConvStrides,
|
||||
ConvDilations,
|
||||
LeftPads,
|
||||
RightPads,
|
||||
BPerBlock,
|
||||
KPerBlock,
|
||||
EPerBlock,
|
||||
GemmMPerThreadSubC,
|
||||
GemmNPerThreadSubC,
|
||||
GemmMLevel0Cluster,
|
||||
GemmNLevel0Cluster,
|
||||
GemmMLevel1Cluster,
|
||||
GemmNLevel1Cluster,
|
||||
GemmKPerThreadLoop,
|
||||
GemmDataPerReadA,
|
||||
GemmDataPerReadB,
|
||||
InBlockCopySubLengths_E_B,
|
||||
InBlockCopyClusterLengths_E_B,
|
||||
InBlockCopyThreadClusterArrangeOrder,
|
||||
InBlockCopySrcAccessOrder,
|
||||
InBlockCopyDstAccessOrder,
|
||||
InBlockCopyDataPerAccess_B,
|
||||
WeiBlockCopySubLengths_E_K,
|
||||
WeiBlockCopyClusterLengths_E_K,
|
||||
WeiBlockCopyThreadClusterArrangeOrder,
|
||||
WeiBlockCopySrcAccessOrder,
|
||||
WeiBlockCopyDstAccessOrder,
|
||||
WeiBlockCopySrcDataPerRead_E,
|
||||
WeiBlockCopyDstDataPerWrite_K,
|
||||
OutThreadCopyDataPerAccess_B>{};
|
||||
constexpr auto gridwise_conv =
|
||||
#if 0
|
||||
GridwiseConvolutionImplicitGemm_v4r4_nchw_kcyx_nkhw_padded
|
||||
#else
|
||||
GridwiseConvolutionImplicitGemm_v4r4_nchw_kcyx_nkhw_padded_lds_double_buffer
|
||||
#endif
|
||||
<GridSize,
|
||||
BlockSize,
|
||||
T,
|
||||
decltype(in_nchw_desc),
|
||||
decltype(wei_kcyx_desc),
|
||||
decltype(out_nkhw_desc),
|
||||
ConvStrides,
|
||||
ConvDilations,
|
||||
LeftPads,
|
||||
RightPads,
|
||||
BPerBlock,
|
||||
KPerBlock,
|
||||
EPerBlock,
|
||||
GemmMPerThreadSubC,
|
||||
GemmNPerThreadSubC,
|
||||
GemmMLevel0Cluster,
|
||||
GemmNLevel0Cluster,
|
||||
GemmMLevel1Cluster,
|
||||
GemmNLevel1Cluster,
|
||||
GemmKPerThreadLoop,
|
||||
GemmDataPerReadA,
|
||||
GemmDataPerReadB,
|
||||
InBlockCopySubLengths_E_B,
|
||||
InBlockCopyClusterLengths_E_B,
|
||||
InBlockCopyThreadClusterArrangeOrder,
|
||||
InBlockCopySrcAccessOrder,
|
||||
InBlockCopyDstAccessOrder,
|
||||
InBlockCopyDataPerAccess_B,
|
||||
WeiBlockCopySubLengths_E_K,
|
||||
WeiBlockCopyClusterLengths_E_K,
|
||||
WeiBlockCopyThreadClusterArrangeOrder,
|
||||
WeiBlockCopySrcAccessOrder,
|
||||
WeiBlockCopyDstAccessOrder,
|
||||
WeiBlockCopySrcDataPerRead_E,
|
||||
WeiBlockCopyDstDataPerWrite_K,
|
||||
OutThreadCopyDataPerAccess_B>{};
|
||||
|
||||
for(index_t i = 0; i < nrepeat; ++i)
|
||||
{
|
||||
|
||||
@@ -92,8 +92,8 @@ int main(int argc, char* argv[])
|
||||
// 3x3, 34x34
|
||||
constexpr index_t N = 64;
|
||||
constexpr index_t C = 256;
|
||||
constexpr index_t HI = 34;
|
||||
constexpr index_t WI = 34;
|
||||
constexpr index_t HI = 32;
|
||||
constexpr index_t WI = 32;
|
||||
constexpr index_t K = 128;
|
||||
constexpr index_t Y = 3;
|
||||
constexpr index_t X = 3;
|
||||
@@ -101,8 +101,8 @@ int main(int argc, char* argv[])
|
||||
using ConvStrides = Sequence<1, 1>;
|
||||
using ConvDilations = Sequence<1, 1>;
|
||||
|
||||
using LeftPads = Sequence<0, 0>;
|
||||
using RightPads = Sequence<0, 0>;
|
||||
using LeftPads = Sequence<1, 1>;
|
||||
using RightPads = Sequence<1, 1>;
|
||||
#elif 0
|
||||
// 1x1 filter, 8x8 image
|
||||
// cudnn@V100 68%, ck@V100 72%, ck@P100 52%, ck@VII 42%
|
||||
|
||||
Reference in New Issue
Block a user