Skip to content

Commit dcc0ca1

Browse files
committed
Fix for GCC 15 compiler error on PPC8/PPC9/PPC10
1 parent fdf177d commit dcc0ca1

File tree

1 file changed

+103
-64
lines changed

1 file changed

+103
-64
lines changed

hwy/ops/ppc_vsx-inl.h

Lines changed: 103 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -3744,16 +3744,73 @@ static HWY_INLINE V VsxF2INormalizeSrcVals(V v) {
37443744
#endif
37453745
}
37463746

3747+
template <class VF32>
3748+
static HWY_INLINE HWY_MAYBE_UNUSED VFromD<Repartition<int64_t, DFromV<VF32>>>
3749+
VsxXvcvspsxds(VF32 vf32) {
3750+
using VI64 = VFromD<Repartition<int64_t, DFromV<VF32>>>;
3751+
#if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1500) || \
3752+
HWY_HAS_BUILTIN(__builtin_vsx_xvcvspsxds)
3753+
// Use __builtin_vsx_xvcvspsxds if it is available (which is the case with
3754+
// GCC 4.8 through GCC 14 or Clang 13 or later on PPC8/PPC9/PPC10)
3755+
return VI64{__builtin_vsx_xvcvspsxds(vf32.raw)};
3756+
#elif HWY_COMPILER_GCC_ACTUAL >= 1500 && HWY_IS_LITTLE_ENDIAN
3757+
// On little-endian PPC8/PPC9/PPC10 with GCC 15 or later, use the F32->I64
3758+
// vec_signedo intrinsic as the __builtin_vsx_xvcvspsxds intrinsic has been
3759+
// removed from GCC in GCC 15
3760+
return VI64{vec_signedo(vf32.raw)};
3761+
#elif HWY_COMPILER_GCC_ACTUAL >= 1500 && HWY_IS_BIG_ENDIAN
3762+
// On big-endian PPC8/PPC9/PPC10 with GCC 15 or later, use the F32->I64
3763+
// vec_signede intrinsic as the __builtin_vsx_xvcvspsxds intrinsic has been
3764+
// removed from GCC in GCC 15
3765+
return VI64{vec_signede(vf32.raw)};
3766+
#else
3767+
// Inline assembly fallback for older versions of Clang that do not have the
3768+
// __builtin_vsx_xvcvspsxds intrinsic
3769+
__vector signed long long raw_result;
3770+
__asm__("xvcvspsxds %x0, %x1" : "=wa"(raw_result) : "wa"(vf32.raw) :);
3771+
return VI64{raw_result};
3772+
#endif
3773+
}
3774+
3775+
template <class VF32>
3776+
static HWY_INLINE HWY_MAYBE_UNUSED VFromD<Repartition<uint64_t, DFromV<VF32>>>
3777+
VsxXvcvspuxds(VF32 vf32) {
3778+
using VU64 = VFromD<Repartition<uint64_t, DFromV<VF32>>>;
3779+
#if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1500) || \
3780+
HWY_HAS_BUILTIN(__builtin_vsx_xvcvspuxds)
3781+
// Use __builtin_vsx_xvcvspuxds if it is available (which is the case with
3782+
// GCC 4.8 through GCC 14 or Clang 13 or later on PPC8/PPC9/PPC10)
3783+
return VU64{reinterpret_cast<__vector unsigned long long>(
3784+
__builtin_vsx_xvcvspuxds(vf32.raw))};
3785+
#elif HWY_COMPILER_GCC_ACTUAL >= 1500 && HWY_IS_LITTLE_ENDIAN
3786+
// On little-endian PPC8/PPC9/PPC10 with GCC 15 or later, use the F32->U64
3787+
// vec_unsignedo intrinsic as the __builtin_vsx_xvcvspuxds intrinsic has been
3788+
// removed from GCC in GCC 15
3789+
return VU64{vec_unsignedo(vf32.raw)};
3790+
#elif HWY_COMPILER_GCC_ACTUAL >= 1500 && HWY_IS_BIG_ENDIAN
3791+
// On big-endian PPC8/PPC9/PPC10 with GCC 15 or later, use the F32->U64
3792+
// vec_unsignedo intrinsic as the __builtin_vsx_xvcvspuxds intrinsic has been
3793+
// removed from GCC in GCC 15
3794+
return VU64{vec_unsignede(vf32.raw)};
3795+
#else
3796+
// Inline assembly fallback for older versions of Clang that do not have the
3797+
// __builtin_vsx_xvcvspuxds intrinsic
3798+
__vector unsigned long long raw_result;
3799+
__asm__("xvcvspuxds %x0, %x1" : "=wa"(raw_result) : "wa"(vf32.raw) :);
3800+
return VU64{raw_result};
3801+
#endif
3802+
}
3803+
37473804
} // namespace detail
37483805
#endif // !HWY_S390X_HAVE_Z14
37493806

37503807
template <class D, HWY_IF_I64_D(D)>
37513808
HWY_API VFromD<D> PromoteTo(D di64, VFromD<Rebind<float, D>> v) {
3752-
#if !HWY_S390X_HAVE_Z14 && \
3753-
(HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN(__builtin_vsx_xvcvspsxds))
3754-
const __vector float raw_v =
3755-
detail::VsxF2INormalizeSrcVals(InterleaveLower(v, v)).raw;
3756-
return VFromD<decltype(di64)>{__builtin_vsx_xvcvspsxds(raw_v)};
3809+
#if !HWY_S390X_HAVE_Z14
3810+
const Repartition<float, decltype(di64)> dt_f32;
3811+
const auto vt_f32 = ResizeBitCast(dt_f32, v);
3812+
return detail::VsxXvcvspsxds(
3813+
detail::VsxF2INormalizeSrcVals(InterleaveLower(vt_f32, vt_f32)));
37573814
#else
37583815
const RebindToFloat<decltype(di64)> df64;
37593816
return ConvertTo(di64, PromoteTo(df64, v));
@@ -3762,12 +3819,11 @@ HWY_API VFromD<D> PromoteTo(D di64, VFromD<Rebind<float, D>> v) {
37623819

37633820
template <class D, HWY_IF_U64_D(D)>
37643821
HWY_API VFromD<D> PromoteTo(D du64, VFromD<Rebind<float, D>> v) {
3765-
#if !HWY_S390X_HAVE_Z14 && \
3766-
(HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN(__builtin_vsx_xvcvspuxds))
3767-
const __vector float raw_v =
3768-
detail::VsxF2INormalizeSrcVals(InterleaveLower(v, v)).raw;
3769-
return VFromD<decltype(du64)>{reinterpret_cast<__vector unsigned long long>(
3770-
__builtin_vsx_xvcvspuxds(raw_v))};
3822+
#if !HWY_S390X_HAVE_Z14
3823+
const Repartition<float, decltype(du64)> dt_f32;
3824+
const auto vt_f32 = ResizeBitCast(dt_f32, v);
3825+
return detail::VsxXvcvspuxds(
3826+
detail::VsxF2INormalizeSrcVals(InterleaveLower(vt_f32, vt_f32)));
37713827
#else
37723828
const RebindToFloat<decltype(du64)> df64;
37733829
return ConvertTo(du64, PromoteTo(df64, v));
@@ -3876,12 +3932,10 @@ HWY_API VFromD<D> PromoteUpperTo(D df64, Vec128<uint32_t> v) {
38763932

38773933
template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I64_D(D)>
38783934
HWY_API VFromD<D> PromoteUpperTo(D di64, Vec128<float> v) {
3879-
#if !HWY_S390X_HAVE_Z14 && \
3880-
(HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN(__builtin_vsx_xvcvspsxds))
3881-
const __vector float raw_v =
3882-
detail::VsxF2INormalizeSrcVals(InterleaveUpper(Full128<float>(), v, v))
3883-
.raw;
3884-
return VFromD<decltype(di64)>{__builtin_vsx_xvcvspsxds(raw_v)};
3935+
#if !HWY_S390X_HAVE_Z14
3936+
(void)di64;
3937+
return detail::VsxXvcvspsxds(
3938+
detail::VsxF2INormalizeSrcVals(InterleaveUpper(Full128<float>(), v, v)));
38853939
#else
38863940
const RebindToFloat<decltype(di64)> df64;
38873941
return ConvertTo(di64, PromoteUpperTo(df64, v));
@@ -3890,13 +3944,10 @@ HWY_API VFromD<D> PromoteUpperTo(D di64, Vec128<float> v) {
38903944

38913945
template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U64_D(D)>
38923946
HWY_API VFromD<D> PromoteUpperTo(D du64, Vec128<float> v) {
3893-
#if !HWY_S390X_HAVE_Z14 && \
3894-
(HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN(__builtin_vsx_xvcvspuxds))
3895-
const __vector float raw_v =
3896-
detail::VsxF2INormalizeSrcVals(InterleaveUpper(Full128<float>(), v, v))
3897-
.raw;
3898-
return VFromD<decltype(du64)>{reinterpret_cast<__vector unsigned long long>(
3899-
__builtin_vsx_xvcvspuxds(raw_v))};
3947+
#if !HWY_S390X_HAVE_Z14
3948+
(void)du64;
3949+
return detail::VsxXvcvspuxds(
3950+
detail::VsxF2INormalizeSrcVals(InterleaveUpper(Full128<float>(), v, v)));
39003951
#else
39013952
const RebindToFloat<decltype(du64)> df64;
39023953
return ConvertTo(du64, PromoteUpperTo(df64, v));
@@ -3984,20 +4035,18 @@ HWY_INLINE VFromD<D> PromoteEvenTo(hwy::SignedTag /*to_type_tag*/,
39844035
hwy::SizeTag<8> /*to_lane_size_tag*/,
39854036
hwy::FloatTag /*from_type_tag*/, D d_to,
39864037
V v) {
3987-
#if !HWY_S390X_HAVE_Z14 && \
3988-
(HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN(__builtin_vsx_xvcvspsxds))
4038+
#if !HWY_S390X_HAVE_Z14
39894039
(void)d_to;
39904040
const auto normalized_v = detail::VsxF2INormalizeSrcVals(v);
39914041
#if HWY_IS_LITTLE_ENDIAN
3992-
// __builtin_vsx_xvcvspsxds expects the source values to be in the odd lanes
3993-
// on little-endian PPC, and the vec_sld operation below will shift the even
4042+
// VsxXvcvspsxds expects the source values to be in the odd lanes on
4043+
// little-endian PPC, and the Shuffle2103 operation below will shift the even
39944044
// lanes of normalized_v into the odd lanes.
3995-
return VFromD<D>{
3996-
__builtin_vsx_xvcvspsxds(vec_sld(normalized_v.raw, normalized_v.raw, 4))};
4045+
return VsxXvcvspsxds(Shuffle2103(normalized_v));
39974046
#else
3998-
// __builtin_vsx_xvcvspsxds expects the source values to be in the even lanes
3999-
// on big-endian PPC.
4000-
return VFromD<D>{__builtin_vsx_xvcvspsxds(normalized_v.raw)};
4047+
// VsxXvcvspsxds expects the source values to be in the even lanes on
4048+
// big-endian PPC.
4049+
return VsxXvcvspsxds(normalized_v);
40014050
#endif
40024051
#else
40034052
const RebindToFloat<decltype(d_to)> df64;
@@ -4012,22 +4061,18 @@ HWY_INLINE VFromD<D> PromoteEvenTo(hwy::UnsignedTag /*to_type_tag*/,
40124061
hwy::SizeTag<8> /*to_lane_size_tag*/,
40134062
hwy::FloatTag /*from_type_tag*/, D d_to,
40144063
V v) {
4015-
#if !HWY_S390X_HAVE_Z14 && \
4016-
(HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN(__builtin_vsx_xvcvspuxds))
4064+
#if !HWY_S390X_HAVE_Z14
40174065
(void)d_to;
40184066
const auto normalized_v = detail::VsxF2INormalizeSrcVals(v);
40194067
#if HWY_IS_LITTLE_ENDIAN
4020-
// __builtin_vsx_xvcvspuxds expects the source values to be in the odd lanes
4021-
// on little-endian PPC, and the vec_sld operation below will shift the even
4022-
// lanes of normalized_v into the odd lanes.
4023-
return VFromD<D>{
4024-
reinterpret_cast<__vector unsigned long long>(__builtin_vsx_xvcvspuxds(
4025-
vec_sld(normalized_v.raw, normalized_v.raw, 4)))};
4068+
// VsxXvcvspuxds expects the source values to be in the odd lanes
4069+
// on little-endian PPC, and the Shuffle2103 operation below will shift the
4070+
// even lanes of normalized_v into the odd lanes.
4071+
return VsxXvcvspuxds(Shuffle2103(normalized_v));
40264072
#else
4027-
// __builtin_vsx_xvcvspuxds expects the source values to be in the even lanes
4073+
// VsxXvcvspuxds expects the source values to be in the even lanes
40284074
// on big-endian PPC.
4029-
return VFromD<D>{reinterpret_cast<__vector unsigned long long>(
4030-
__builtin_vsx_xvcvspuxds(normalized_v.raw))};
4075+
return VsxXvcvspuxds(normalized_v);
40314076
#endif
40324077
#else
40334078
const RebindToFloat<decltype(d_to)> df64;
@@ -4069,20 +4114,18 @@ HWY_INLINE VFromD<D> PromoteOddTo(hwy::SignedTag /*to_type_tag*/,
40694114
hwy::SizeTag<8> /*to_lane_size_tag*/,
40704115
hwy::FloatTag /*from_type_tag*/, D d_to,
40714116
V v) {
4072-
#if !HWY_S390X_HAVE_Z14 && \
4073-
(HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN(__builtin_vsx_xvcvspsxds))
4117+
#if !HWY_S390X_HAVE_Z14
40744118
(void)d_to;
40754119
const auto normalized_v = detail::VsxF2INormalizeSrcVals(v);
40764120
#if HWY_IS_LITTLE_ENDIAN
4077-
// __builtin_vsx_xvcvspsxds expects the source values to be in the odd lanes
4121+
// VsxXvcvspsxds expects the source values to be in the odd lanes
40784122
// on little-endian PPC
4079-
return VFromD<D>{__builtin_vsx_xvcvspsxds(normalized_v.raw)};
4123+
return VsxXvcvspsxds(normalized_v);
40804124
#else
4081-
// __builtin_vsx_xvcvspsxds expects the source values to be in the even lanes
4082-
// on big-endian PPC, and the vec_sld operation below will shift the odd lanes
4083-
// of normalized_v into the even lanes.
4084-
return VFromD<D>{
4085-
__builtin_vsx_xvcvspsxds(vec_sld(normalized_v.raw, normalized_v.raw, 4))};
4125+
// VsxXvcvspsxds expects the source values to be in the even lanes
4126+
// on big-endian PPC, and the Shuffle0321 operation below will shift the odd
4127+
// lanes of normalized_v into the even lanes.
4128+
return VsxXvcvspsxds(Shuffle0321(normalized_v));
40864129
#endif
40874130
#else
40884131
const RebindToFloat<decltype(d_to)> df64;
@@ -4097,22 +4140,18 @@ HWY_INLINE VFromD<D> PromoteOddTo(hwy::UnsignedTag /*to_type_tag*/,
40974140
hwy::SizeTag<8> /*to_lane_size_tag*/,
40984141
hwy::FloatTag /*from_type_tag*/, D d_to,
40994142
V v) {
4100-
#if !HWY_S390X_HAVE_Z14 && \
4101-
(HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN(__builtin_vsx_xvcvspuxds))
4143+
#if !HWY_S390X_HAVE_Z14
41024144
(void)d_to;
41034145
const auto normalized_v = detail::VsxF2INormalizeSrcVals(v);
41044146
#if HWY_IS_LITTLE_ENDIAN
4105-
// __builtin_vsx_xvcvspuxds expects the source values to be in the odd lanes
4147+
// VsxXvcvspuxds expects the source values to be in the odd lanes
41064148
// on little-endian PPC
4107-
return VFromD<D>{reinterpret_cast<__vector unsigned long long>(
4108-
__builtin_vsx_xvcvspuxds(normalized_v.raw))};
4149+
return VsxXvcvspuxds(normalized_v);
41094150
#else
4110-
// __builtin_vsx_xvcvspuxds expects the source values to be in the even lanes
4111-
// on big-endian PPC, and the vec_sld operation below will shift the odd lanes
4112-
// of normalized_v into the even lanes.
4113-
return VFromD<D>{
4114-
reinterpret_cast<__vector unsigned long long>(__builtin_vsx_xvcvspuxds(
4115-
vec_sld(normalized_v.raw, normalized_v.raw, 4)))};
4151+
// VsxXvcvspuxds expects the source values to be in the even lanes
4152+
// on big-endian PPC, and the Shuffle0321 operation below will shift the odd
4153+
// lanes of normalized_v into the even lanes.
4154+
return VsxXvcvspuxds(Shuffle0321(normalized_v));
41164155
#endif
41174156
#else
41184157
const RebindToFloat<decltype(d_to)> df64;

0 commit comments

Comments
 (0)