Skip to content

Commit 172036d

Browse files
author
Chao Liu
committed
add c-style pointer cast
1 parent 76f3131 commit 172036d

File tree

6 files changed

+62
-34
lines changed

6 files changed

+62
-34
lines changed

composable_kernel/include/utility/amd_address_space.hpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33

44
#include "config.hpp"
55

6+
// Address Space for AMDGCN
7+
// https://llvm.org/docs/AMDGPUUsage.html#address-space
8+
69
namespace ck {
710

811
enum AddressSpaceEnum_t
@@ -17,15 +20,24 @@ enum AddressSpaceEnum_t
1720
template <typename T>
1821
__device__ T* cast_pointer_to_generic_address_space(T CONSTANT* p)
1922
{
20-
return (T*)p;
23+
// cast a pointer in "Constant" address space (4) to "Generic" address space (0)
24+
// only old style cast seems be able to be compiled
25+
#pragma clang diagnostic ignored "-Wold-style-cast"
26+
#pragma clang diagnostic push
27+
return (T*)p; // NOLINT(old-style-cast)
28+
#pragma clang diagnostic pop
2129
}
2230

2331
template <typename T>
2432
__host__ __device__ T CONSTANT* cast_pointer_to_constant_address_space(T* p)
2533
{
26-
return (T CONSTANT*)p;
34+
// cast a pointer in "Generic" address space (0) to "Constant" address space (4)
35+
// only old style cast seems be able to be compiled
36+
#pragma clang diagnostic ignored "-Wold-style-cast"
37+
#pragma clang diagnostic push
38+
return (T CONSTANT*)p; // NOLINT(old-style-cast)
39+
#pragma clang diagnostic pop
2740
}
2841

2942
} // namespace ck
30-
3143
#endif
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#ifndef CK_C_STYLE_POINTER_CAST_HPP
2+
#define CK_C_STYLE_POINTER_CAST_HPP
3+
4+
#include "type.hpp"
5+
6+
namespace ck {
7+
8+
template <typename PY,
9+
typename PX,
10+
typename std::enable_if<is_pointer_v<PY> && is_pointer_v<PX>, bool>::type = false>
11+
__host__ __device__ PY c_style_pointer_cast(PX p_x)
12+
{
13+
#pragma clang diagnostic ignored "-Wold-style-cast"
14+
#pragma clang diagnostic push
15+
return (PY)p_x; // NOLINT(old-style-cast)
16+
#pragma clang diagnostic pop
17+
}
18+
19+
} // namespace ck
20+
#endif

composable_kernel/include/utility/common_header.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "type.hpp"
2626
#include "magic_division.hpp"
2727
#include "utility.hpp"
28+
#include "c_style_pointer_cast.hpp"
2829
#include "amd_address_space.hpp"
2930
#include "amd_buffer_addressing.hpp"
3031
#include "static_buffer.hpp"

composable_kernel/include/utility/dynamic_buffer.hpp

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
#ifndef CK_DYNAMIC_BUFFER_HPP
22
#define CK_DYNAMIC_BUFFER_HPP
33

4-
namespace ck {
5-
64
#include "amd_buffer_addressing.hpp"
5+
#include "c_style_pointer_cast.hpp"
6+
7+
namespace ck {
78

89
template <AddressSpaceEnum_t BufferAddressSpace, typename T, typename ElementSpaceSize>
910
struct DynamicBuffer
@@ -44,20 +45,20 @@ struct DynamicBuffer
4445
static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
4546
"wrong! X need to be multiple T");
4647

47-
constexpr index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
48-
4948
if constexpr(GetAddressSpace() == AddressSpaceEnum_t::Global)
5049
{
5150
#if CK_USE_AMD_BUFFER_ADDRESSING
51+
constexpr index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
52+
5253
return amd_buffer_load_v2<remove_cv_t<remove_reference_t<T>>, t_per_x>(
5354
p_data_, i, is_valid_offset, element_space_size_);
5455
#else
55-
return is_valid_offset ? *reinterpret_cast<const X*>(&p_data_[i]) : X{0};
56+
return is_valid_offset ? *c_style_pointer_cast<const X*>(&p_data_[i]) : X{0};
5657
#endif
5758
}
5859
else
5960
{
60-
return is_valid_offset ? *reinterpret_cast<const X*>(&p_data_[i]) : X{0};
61+
return is_valid_offset ? *c_style_pointer_cast<const X*>(&p_data_[i]) : X{0};
6162
}
6263
}
6364

@@ -78,17 +79,17 @@ struct DynamicBuffer
7879
static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
7980
"wrong! X need to be multiple T");
8081

81-
constexpr index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
82-
8382
if constexpr(GetAddressSpace() == AddressSpaceEnum_t::Global)
8483
{
8584
#if CK_USE_AMD_BUFFER_ADDRESSING
85+
constexpr index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
86+
8687
amd_buffer_store_v2<remove_cv_t<remove_reference_t<T>>, t_per_x>(
8788
x, p_data_, i, is_valid_offset, element_space_size_);
8889
#else
8990
if(is_valid_offset)
9091
{
91-
*reinterpret_cast<X*>(&p_data_[i]) = x;
92+
*c_style_pointer_cast<X*>(&p_data_[i]) = x;
9293
}
9394
#endif
9495
}
@@ -97,7 +98,7 @@ struct DynamicBuffer
9798
if(is_valid_offset)
9899
{
99100
#if !CK_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE
100-
*reinterpret_cast<X*>(&p_data_[i]) = x;
101+
*c_style_pointer_cast<X*>(&p_data_[i]) = x;
101102
#else
102103
// HACK: compiler would lower IR "store<i8, 16> address_space(3)" into
103104
// inefficient
@@ -128,56 +129,56 @@ struct DynamicBuffer
128129
{
129130
// HACK: cast pointer of x is bad
130131
// TODO: remove this after compiler fix
131-
*reinterpret_cast<int8_t*>(&p_data_[i]) =
132-
*reinterpret_cast<const int8_t*>(&x);
132+
*c_style_pointer_cast<int8_t*>(&p_data_[i]) =
133+
*c_style_pointer_cast<const int8_t*>(&x);
133134
}
134135
else if constexpr(is_same<remove_cv_t<remove_reference_t<T>>, int8_t>::value &&
135136
is_same<remove_cv_t<remove_reference_t<X>>, int8x2_t>::value)
136137
{
137138
// HACK: cast pointer of x is bad
138139
// TODO: remove this after compiler fix
139-
*reinterpret_cast<int16_t*>(&p_data_[i]) =
140-
*reinterpret_cast<const int16_t*>(&x);
140+
*c_style_pointer_cast<int16_t*>(&p_data_[i]) =
141+
*c_style_pointer_cast<const int16_t*>(&x);
141142
}
142143
else if constexpr(is_same<remove_cv_t<remove_reference_t<T>>, int8_t>::value &&
143144
is_same<remove_cv_t<remove_reference_t<X>>, int8x4_t>::value)
144145
{
145146
// HACK: cast pointer of x is bad
146147
// TODO: remove this after compiler fix
147-
*reinterpret_cast<int32_t*>(&p_data_[i]) =
148-
*reinterpret_cast<const int32_t*>(&x);
148+
*c_style_pointer_cast<int32_t*>(&p_data_[i]) =
149+
*c_style_pointer_cast<const int32_t*>(&x);
149150
}
150151
else if constexpr(is_same<remove_cv_t<remove_reference_t<T>>,
151152
int8x4_t>::value &&
152153
is_same<remove_cv_t<remove_reference_t<X>>, int8x4_t>::value)
153154
{
154155
// HACK: cast pointer of x is bad
155156
// TODO: remove this after compiler fix
156-
*reinterpret_cast<int32_t*>(&p_data_[i]) =
157-
*reinterpret_cast<const int32_t*>(&x);
157+
*c_style_pointer_cast<int32_t*>(&p_data_[i]) =
158+
*c_style_pointer_cast<const int32_t*>(&x);
158159
}
159160
else if constexpr(is_same<remove_cv_t<remove_reference_t<T>>,
160161
int8x8_t>::value &&
161162
is_same<remove_cv_t<remove_reference_t<X>>, int8x8_t>::value)
162163
{
163164
// HACK: cast pointer of x is bad
164165
// TODO: remove this after compiler fix
165-
*reinterpret_cast<int32x2_t*>(&p_data_[i]) =
166-
*reinterpret_cast<const int32x2_t*>(&x);
166+
*c_style_pointer_cast<int32x2_t*>(&p_data_[i]) =
167+
*c_style_pointer_cast<const int32x2_t*>(&x);
167168
}
168169
else if constexpr(is_same<remove_cv_t<remove_reference_t<T>>,
169170
int8x16_t>::value &&
170171
is_same<remove_cv_t<remove_reference_t<X>>, int8x16_t>::value)
171172
{
172173
// HACK: cast pointer of x is bad
173174
// TODO: remove this after compiler fix
174-
*reinterpret_cast<int32x4_t*>(&p_data_[i]) =
175-
*reinterpret_cast<const int32x4_t*>(&x);
175+
*c_style_pointer_cast<int32x4_t*>(&p_data_[i]) =
176+
*c_style_pointer_cast<const int32x4_t*>(&x);
176177
}
177178
}
178179
else
179180
{
180-
*reinterpret_cast<X*>(&p_data_[i]) = x;
181+
*c_style_pointer_cast<X*>(&p_data_[i]) = x;
181182
}
182183
#endif
183184
}
@@ -186,7 +187,7 @@ struct DynamicBuffer
186187
{
187188
if(is_valid_offset)
188189
{
189-
*reinterpret_cast<X*>(&p_data_[i]) = x;
190+
*c_style_pointer_cast<X*>(&p_data_[i]) = x;
190191
}
191192
}
192193
}

composable_kernel/include/utility/type.hpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,7 @@ template <typename T>
2222
using remove_cv_t = typename std::remove_cv<T>::type;
2323

2424
template <typename T>
25-
constexpr std::remove_reference_t<T>&& move(T&& t) noexcept
26-
{
27-
return static_cast<typename std::remove_reference<T>::type&&>(t);
28-
}
25+
inline constexpr bool is_pointer_v = std::is_pointer<T>::value;
2926

3027
template <typename T>
3128
struct is_known_at_compile_time;

host/driver_offline/include/device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_kyxc_nhwk.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -290,9 +290,6 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_k
290290
const auto K = out_n_ho_wo_k_lengths[I3];
291291
const auto C = wei_k_y_x_c_lengths[I3];
292292

293-
const auto Hi = in_n_hi_wi_c_lengths[I1];
294-
const auto Wi = in_n_hi_wi_c_lengths[I2];
295-
296293
const auto Ho = out_n_ho_wo_k_lengths[I1];
297294
const auto Wo = out_n_ho_wo_k_lengths[I2];
298295

0 commit comments

Comments
 (0)