[llvm] 1ceaec3 - [PowerPC][altivec] Optimize codegen of vec_promote
Kai Luo via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 23 19:10:17 PDT 2023
Author: Kai Luo
Date: 2023-08-24T02:10:13Z
New Revision: 1ceaec3e81044d8a671b28d1f556045cf7fe6ef0
URL: https://github.com/llvm/llvm-project/commit/1ceaec3e81044d8a671b28d1f556045cf7fe6ef0
DIFF: https://github.com/llvm/llvm-project/commit/1ceaec3e81044d8a671b28d1f556045cf7fe6ef0.diff
LOG: [PowerPC][altivec] Optimize codegen of vec_promote
According to https://www.ibm.com/docs/en/xl-c-and-cpp-linux/16.1.1?topic=functions-vec-promote, elements not specified by the input index argument are undefined. So that we don't need to set these elements to be zeros.
Reviewed By: nemanjai, #powerpc
Differential Revision: https://reviews.llvm.org/D158487
Added:
llvm/test/CodeGen/PowerPC/vec-promote.ll
Modified:
clang/lib/Headers/altivec.h
clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c
Removed:
################################################################################
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 44b5a24de89f1a..4971631c50f412 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -14647,67 +14647,86 @@ static __inline__ void __ATTRS_o_ai vec_stvrxl(vector float __a, int __b,
static __inline__ vector signed char __ATTRS_o_ai vec_promote(signed char __a,
int __b) {
- vector signed char __res = (vector signed char)(0);
+ const vector signed char __zero = (vector signed char)0;
+ vector signed char __res =
+ __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0xf] = __a;
return __res;
}
static __inline__ vector unsigned char __ATTRS_o_ai
vec_promote(unsigned char __a, int __b) {
- vector unsigned char __res = (vector unsigned char)(0);
+ const vector unsigned char __zero = (vector unsigned char)(0);
+ vector unsigned char __res =
+ __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0xf] = __a;
return __res;
}
static __inline__ vector short __ATTRS_o_ai vec_promote(short __a, int __b) {
- vector short __res = (vector short)(0);
+ const vector short __zero = (vector short)(0);
+ vector short __res =
+ __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0x7] = __a;
return __res;
}
static __inline__ vector unsigned short __ATTRS_o_ai
vec_promote(unsigned short __a, int __b) {
- vector unsigned short __res = (vector unsigned short)(0);
+ const vector unsigned short __zero = (vector unsigned short)(0);
+ vector unsigned short __res =
+ __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0x7] = __a;
return __res;
}
static __inline__ vector int __ATTRS_o_ai vec_promote(int __a, int __b) {
- vector int __res = (vector int)(0);
+ const vector int __zero = (vector int)(0);
+ vector int __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
__res[__b & 0x3] = __a;
return __res;
}
static __inline__ vector unsigned int __ATTRS_o_ai vec_promote(unsigned int __a,
int __b) {
- vector unsigned int __res = (vector unsigned int)(0);
+ const vector unsigned int __zero = (vector unsigned int)(0);
+ vector unsigned int __res =
+ __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
__res[__b & 0x3] = __a;
return __res;
}
static __inline__ vector float __ATTRS_o_ai vec_promote(float __a, int __b) {
- vector float __res = (vector float)(0);
+ const vector float __zero = (vector float)(0);
+ vector float __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
__res[__b & 0x3] = __a;
return __res;
}
#ifdef __VSX__
static __inline__ vector double __ATTRS_o_ai vec_promote(double __a, int __b) {
- vector double __res = (vector double)(0);
+ const vector double __zero = (vector double)(0);
+ vector double __res = __builtin_shufflevector(__zero, __zero, -1, -1);
__res[__b & 0x1] = __a;
return __res;
}
static __inline__ vector signed long long __ATTRS_o_ai
vec_promote(signed long long __a, int __b) {
- vector signed long long __res = (vector signed long long)(0);
+ const vector signed long long __zero = (vector signed long long)(0);
+ vector signed long long __res =
+ __builtin_shufflevector(__zero, __zero, -1, -1);
__res[__b & 0x1] = __a;
return __res;
}
static __inline__ vector unsigned long long __ATTRS_o_ai
vec_promote(unsigned long long __a, int __b) {
- vector unsigned long long __res = (vector unsigned long long)(0);
+ const vector unsigned long long __zero = (vector unsigned long long)(0);
+ vector unsigned long long __res =
+ __builtin_shufflevector(__zero, __zero, -1, -1);
__res[__b & 0x1] = __a;
return __res;
}
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c
index cca2a8b2f55bd4..1fe56a820512d0 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c
@@ -2232,35 +2232,45 @@ res_vuc = vec_xxsldwi(vuc, vuc, 1);
res_vd = vec_promote(d, 0);
// CHECK: store <2 x double> zeroinitializer
+// CHECK: store <2 x double> poison
// CHECK: insertelement <2 x double>
// CHECK-LE: store <2 x double> zeroinitializer
+// CHECK-LE: store <2 x double> poison
// CHECK-LE: insertelement <2 x double>
res_vsll = vec_promote(sll, 0);
// CHECK: store <2 x i64> zeroinitializer
+// CHECK: store <2 x i64> poison
// CHECK: insertelement <2 x i64>
// CHECK-LE: store <2 x i64> zeroinitializer
+// CHECK-LE: store <2 x i64> poison
// CHECK-LE: insertelement <2 x i64>
res_vull = vec_promote(ull, 0);
// CHECK: store <2 x i64> zeroinitializer
+// CHECK: store <2 x i64> poison
// CHECK: insertelement <2 x i64>
// CHECK-LE: store <2 x i64> zeroinitializer
+// CHECK-LE: store <2 x i64> poison
// CHECK-LE: insertelement <2 x i64>
res_vsc = vec_promote(asc[0], 8);
// CHECK: store <16 x i8> zeroinitializer
+// CHECK: store <16 x i8> poison
// CHECK: [[IDX:%.*]] = and i32 {{.*}}, 15
// CHECK: insertelement <16 x i8> {{.*}}, i8 {{.*}}, i32 [[IDX]]
// CHECK-LE: store <16 x i8> zeroinitializer
+// CHECK-LE: store <16 x i8> poison
// CHECK-LE: [[IDX:%.*]] = and i32 {{.*}}, 15
// CHECK-LE: insertelement <16 x i8> {{.*}}, i8 {{.*}}, i32 [[IDX]]
res_vuc = vec_promote(auc[0], 8);
// CHECK: store <16 x i8> zeroinitializer
+// CHECK: store <16 x i8> poison
// CHECK: [[IDX:%.*]] = and i32 {{.*}}, 15
// CHECK: insertelement <16 x i8> {{.*}}, i8 {{.*}}, i32 [[IDX]]
// CHECK-LE: store <16 x i8> zeroinitializer
+// CHECK-LE: store <16 x i8> poison
// CHECK-LE: [[IDX:%.*]] = and i32 {{.*}}, 15
// CHECK-LE: insertelement <16 x i8> {{.*}}, i8 {{.*}}, i32 [[IDX]]
}
diff --git a/llvm/test/CodeGen/PowerPC/vec-promote.ll b/llvm/test/CodeGen/PowerPC/vec-promote.ll
new file mode 100644
index 00000000000000..1fbb0e8f4205e0
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vec-promote.ll
@@ -0,0 +1,276 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=powerpc64-unknown-unknown -verify-machineinstrs -mcpu=pwr8 \
+; RUN: < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs -mcpu=pwr8 \
+; RUN: < %s | FileCheck %s --check-prefix=CHECK-LE
+
+define noundef <2 x double> @vec_promote_double_zeroed(ptr nocapture noundef readonly %p) {
+; CHECK-BE-LABEL: vec_promote_double_zeroed:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lfd 0, 0(3)
+; CHECK-BE-NEXT: xxlxor 1, 1, 1
+; CHECK-BE-NEXT: xxmrghd 34, 0, 1
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: vec_promote_double_zeroed:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: lfd 0, 0(3)
+; CHECK-LE-NEXT: xxlxor 1, 1, 1
+; CHECK-LE-NEXT: xxmrghd 34, 1, 0
+; CHECK-LE-NEXT: blr
+entry:
+ %0 = load double, ptr %p, align 8
+ %vecins.i = insertelement <2 x double> <double poison, double 0.0>, double %0, i64 0
+ ret <2 x double> %vecins.i
+}
+
+define noundef <2 x double> @vec_promote_double(ptr nocapture noundef readonly %p) {
+; CHECK-BE-LABEL: vec_promote_double:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvdsx 34, 0, 3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: vec_promote_double:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: lxvdsx 34, 0, 3
+; CHECK-LE-NEXT: blr
+entry:
+ %0 = load double, ptr %p, align 8
+ %vecins.i = insertelement <2 x double> poison, double %0, i64 0
+ ret <2 x double> %vecins.i
+}
+
+define noundef <4 x float> @vec_promote_float_zeroed(ptr nocapture noundef readonly %p) {
+; CHECK-BE-LABEL: vec_promote_float_zeroed:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lfs 1, 0(3)
+; CHECK-BE-NEXT: xxlxor 0, 0, 0
+; CHECK-BE-NEXT: xxspltd 2, 0, 0
+; CHECK-BE-NEXT: xxmrghd 0, 1, 0
+; CHECK-BE-NEXT: xvcvdpsp 34, 2
+; CHECK-BE-NEXT: xvcvdpsp 35, 0
+; CHECK-BE-NEXT: vmrgew 2, 3, 2
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: vec_promote_float_zeroed:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: lfs 1, 0(3)
+; CHECK-LE-NEXT: xxlxor 0, 0, 0
+; CHECK-LE-NEXT: xxspltd 2, 0, 0
+; CHECK-LE-NEXT: xxmrghd 0, 0, 1
+; CHECK-LE-NEXT: xvcvdpsp 34, 2
+; CHECK-LE-NEXT: xvcvdpsp 35, 0
+; CHECK-LE-NEXT: vmrgew 2, 2, 3
+; CHECK-LE-NEXT: blr
+entry:
+ %0 = load float, ptr %p, align 8
+ %vecins.i = insertelement <4 x float> <float poison, float 0.0, float 0.0, float 0.0>, float %0, i64 0
+ ret <4 x float> %vecins.i
+}
+
+define noundef <4 x float> @vec_promote_float(ptr nocapture noundef readonly %p) {
+; CHECK-BE-LABEL: vec_promote_float:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lfiwzx 0, 0, 3
+; CHECK-BE-NEXT: xxspltw 34, 0, 1
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: vec_promote_float:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: lfiwzx 0, 0, 3
+; CHECK-LE-NEXT: xxspltw 34, 0, 1
+; CHECK-LE-NEXT: blr
+entry:
+ %0 = load float, ptr %p, align 8
+ %vecins.i = insertelement <4 x float> poison, float %0, i64 0
+ ret <4 x float> %vecins.i
+}
+
+define noundef <2 x i64> @vec_promote_long_long_zeroed(ptr nocapture noundef readonly %p) {
+; CHECK-BE-LABEL: vec_promote_long_long_zeroed:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: ld 3, 0(3)
+; CHECK-BE-NEXT: li 4, 0
+; CHECK-BE-NEXT: mtfprd 0, 4
+; CHECK-BE-NEXT: mtfprd 1, 3
+; CHECK-BE-NEXT: xxmrghd 34, 1, 0
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: vec_promote_long_long_zeroed:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: ld 3, 0(3)
+; CHECK-LE-NEXT: li 4, 0
+; CHECK-LE-NEXT: mtfprd 0, 4
+; CHECK-LE-NEXT: mtfprd 1, 3
+; CHECK-LE-NEXT: xxmrghd 34, 0, 1
+; CHECK-LE-NEXT: blr
+entry:
+ %0 = load i64, ptr %p, align 8
+ %vecins.i = insertelement <2 x i64> <i64 poison, i64 0>, i64 %0, i64 0
+ ret <2 x i64> %vecins.i
+}
+
+define noundef <2 x i64> @vec_promote_long_long(ptr nocapture noundef readonly %p) {
+; CHECK-BE-LABEL: vec_promote_long_long:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvdsx 34, 0, 3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: vec_promote_long_long:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: lxvdsx 34, 0, 3
+; CHECK-LE-NEXT: blr
+entry:
+ %0 = load i64, ptr %p, align 8
+ %vecins.i = insertelement <2 x i64> poison, i64 %0, i64 0
+ ret <2 x i64> %vecins.i
+}
+
+define noundef <4 x i32> @vec_promote_int_zeroed(ptr nocapture noundef readonly %p) {
+; CHECK-BE-LABEL: vec_promote_int_zeroed:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lwz 3, 0(3)
+; CHECK-BE-NEXT: li 4, 0
+; CHECK-BE-NEXT: li 5, 0
+; CHECK-BE-NEXT: rldimi 5, 5, 32, 0
+; CHECK-BE-NEXT: rldimi 4, 3, 32, 0
+; CHECK-BE-NEXT: mtfprd 1, 5
+; CHECK-BE-NEXT: mtfprd 0, 4
+; CHECK-BE-NEXT: xxmrghd 34, 0, 1
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: vec_promote_int_zeroed:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: lwz 3, 0(3)
+; CHECK-LE-NEXT: li 4, 0
+; CHECK-LE-NEXT: rldimi 3, 4, 32, 0
+; CHECK-LE-NEXT: rldimi 4, 4, 32, 0
+; CHECK-LE-NEXT: mtfprd 0, 3
+; CHECK-LE-NEXT: mtfprd 1, 4
+; CHECK-LE-NEXT: xxmrghd 34, 1, 0
+; CHECK-LE-NEXT: blr
+entry:
+ %0 = load i32, ptr %p, align 4
+ %vecins.i = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %0, i64 0
+ ret <4 x i32> %vecins.i
+}
+
+define noundef <4 x i32> @vec_promote_int(ptr nocapture noundef readonly %p) {
+; CHECK-BE-LABEL: vec_promote_int:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lfiwzx 0, 0, 3
+; CHECK-BE-NEXT: xxspltw 34, 0, 1
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: vec_promote_int:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: lfiwzx 0, 0, 3
+; CHECK-LE-NEXT: xxspltw 34, 0, 1
+; CHECK-LE-NEXT: blr
+entry:
+ %0 = load i32, ptr %p, align 4
+ %vecins.i = insertelement <4 x i32> poison, i32 %0, i64 0
+ ret <4 x i32> %vecins.i
+}
+
+define noundef <8 x i16> @vec_promote_short_zeroed(ptr nocapture noundef readonly %p) {
+; CHECK-BE-LABEL: vec_promote_short_zeroed:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: addis 4, 2, .LCPI8_0 at toc@ha
+; CHECK-BE-NEXT: lhz 3, 0(3)
+; CHECK-BE-NEXT: li 5, 0
+; CHECK-BE-NEXT: addi 4, 4, .LCPI8_0 at toc@l
+; CHECK-BE-NEXT: mtvsrwz 35, 5
+; CHECK-BE-NEXT: lxvw4x 34, 0, 4
+; CHECK-BE-NEXT: mtvsrwz 36, 3
+; CHECK-BE-NEXT: vperm 2, 4, 3, 2
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: vec_promote_short_zeroed:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: addis 4, 2, .LCPI8_0 at toc@ha
+; CHECK-LE-NEXT: lhz 3, 0(3)
+; CHECK-LE-NEXT: addi 4, 4, .LCPI8_0 at toc@l
+; CHECK-LE-NEXT: lxvd2x 0, 0, 4
+; CHECK-LE-NEXT: li 4, 0
+; CHECK-LE-NEXT: mtvsrd 36, 3
+; CHECK-LE-NEXT: mtvsrd 34, 4
+; CHECK-LE-NEXT: xxswapd 35, 0
+; CHECK-LE-NEXT: vperm 2, 2, 4, 3
+; CHECK-LE-NEXT: blr
+entry:
+ %0 = load i16, ptr %p, align 2
+ %vecins.i = insertelement <8 x i16> <i16 poison, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %0, i64 0
+ ret <8 x i16> %vecins.i
+}
+
+define noundef <8 x i16> @vec_promote_short(ptr nocapture noundef readonly %p) {
+; CHECK-BE-LABEL: vec_promote_short:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lhzx 3, 0, 3
+; CHECK-BE-NEXT: mtvsrwz 34, 3
+; CHECK-BE-NEXT: vsplth 2, 2, 3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: vec_promote_short:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: lhzx 3, 0, 3
+; CHECK-LE-NEXT: mtvsrwz 34, 3
+; CHECK-LE-NEXT: vsplth 2, 2, 3
+; CHECK-LE-NEXT: blr
+entry:
+ %0 = load i16, ptr %p, align 2
+ %vecins.i = insertelement <8 x i16> poison, i16 %0, i64 0
+ ret <8 x i16> %vecins.i
+}
+
+define noundef <16 x i8> @vec_promote_char_zeroed(ptr nocapture noundef readonly %p) {
+; CHECK-BE-LABEL: vec_promote_char_zeroed:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: addis 4, 2, .LCPI10_0 at toc@ha
+; CHECK-BE-NEXT: lbz 3, 0(3)
+; CHECK-BE-NEXT: li 5, 0
+; CHECK-BE-NEXT: addi 4, 4, .LCPI10_0 at toc@l
+; CHECK-BE-NEXT: mtvsrwz 35, 5
+; CHECK-BE-NEXT: lxvw4x 34, 0, 4
+; CHECK-BE-NEXT: mtvsrwz 36, 3
+; CHECK-BE-NEXT: vperm 2, 4, 3, 2
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: vec_promote_char_zeroed:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: addis 4, 2, .LCPI10_0 at toc@ha
+; CHECK-LE-NEXT: lbz 3, 0(3)
+; CHECK-LE-NEXT: addi 4, 4, .LCPI10_0 at toc@l
+; CHECK-LE-NEXT: lxvd2x 0, 0, 4
+; CHECK-LE-NEXT: li 4, 0
+; CHECK-LE-NEXT: mtvsrd 36, 3
+; CHECK-LE-NEXT: mtvsrd 34, 4
+; CHECK-LE-NEXT: xxswapd 35, 0
+; CHECK-LE-NEXT: vperm 2, 2, 4, 3
+; CHECK-LE-NEXT: blr
+entry:
+ %0 = load i8, ptr %p, align 1
+ %vecins.i = insertelement <16 x i8> <i8 poison, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, i8 %0, i64 0
+ ret <16 x i8> %vecins.i
+}
+
+define noundef <16 x i8> @vec_promote_char(ptr nocapture noundef readonly %p) {
+; CHECK-BE-LABEL: vec_promote_char:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lbzx 3, 0, 3
+; CHECK-BE-NEXT: mtvsrwz 34, 3
+; CHECK-BE-NEXT: vspltb 2, 2, 7
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: vec_promote_char:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: lbzx 3, 0, 3
+; CHECK-LE-NEXT: mtvsrwz 34, 3
+; CHECK-LE-NEXT: vspltb 2, 2, 7
+; CHECK-LE-NEXT: blr
+entry:
+ %0 = load i8, ptr %p, align 1
+ %vecins.i = insertelement <16 x i8> poison, i8 %0, i64 0
+ ret <16 x i8> %vecins.i
+}
More information about the llvm-commits
mailing list