[llvm] 263f1b2 - [PowerPC] Fix combine step for shufflevector.
Stefan Pintilie via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 14 09:31:29 PDT 2022
Author: Stefan Pintilie
Date: 2022-06-14T11:31:24-05:00
New Revision: 263f1b2f5df71c71397f5bb05c353f3d9d27522e
URL: https://github.com/llvm/llvm-project/commit/263f1b2f5df71c71397f5bb05c353f3d9d27522e
DIFF: https://github.com/llvm/llvm-project/commit/263f1b2f5df71c71397f5bb05c353f3d9d27522e.diff
LOG: [PowerPC] Fix combine step for shufflevector.
The combine step for shufflevector will sometimes replace undef in the mask
with a defined value. This can cause an infinite loop in some cases as another
combine will then put the undef back in the mask.
This patch fixes the issue so that undefs are not replaced when doing a combine.
Reviewed By: ZarkoCA, amyk, quinnp, saghir
Differential Revision: https://reviews.llvm.org/D127439
Added:
llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 636c1b4390e9f..86258a6ad2eeb 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -14972,24 +14972,36 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
// Example (even elements from first vector):
// vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
if (Mask[0] < NumElts)
- for (int i = 1, e = Mask.size(); i < e; i += 2)
+ for (int i = 1, e = Mask.size(); i < e; i += 2) {
+ if (ShuffV[i] < 0)
+ continue;
ShuffV[i] = (ShuffV[i - 1] + NumElts);
+ }
// Example (odd elements from first vector):
// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
else
- for (int i = 0, e = Mask.size(); i < e; i += 2)
+ for (int i = 0, e = Mask.size(); i < e; i += 2) {
+ if (ShuffV[i] < 0)
+ continue;
ShuffV[i] = (ShuffV[i + 1] + NumElts);
+ }
} else {
// Example (even elements from first vector):
// vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
if (Mask[0] < NumElts)
- for (int i = 0, e = Mask.size(); i < e; i += 2)
+ for (int i = 0, e = Mask.size(); i < e; i += 2) {
+ if (ShuffV[i] < 0)
+ continue;
ShuffV[i] = ShuffV[i + 1] - NumElts;
+ }
// Example (odd elements from first vector):
// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
else
- for (int i = 1, e = Mask.size(); i < e; i += 2)
+ for (int i = 1, e = Mask.size(); i < e; i += 2) {
+ if (ShuffV[i] < 0)
+ continue;
ShuffV[i] = ShuffV[i - 1] - NumElts;
+ }
}
// If the RHS has undefs, we need to remove them since we may have created
diff --git a/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll b/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
new file mode 100644
index 0000000000000..544683d7aa229
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
@@ -0,0 +1,96 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr10 -verify-machineinstrs \
+; RUN: < %s | FileCheck %s --check-prefix=AIX
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr10 -verify-machineinstrs \
+; RUN: < %s | FileCheck %s --check-prefix=AIX-32
+; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
+; RUN: | FileCheck %s --check-prefix=LE
+; RUN: llc -verify-machineinstrs -mtriple powerpcle-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
+; RUN: | FileCheck %s --check-prefix=LE-32
+; RUN: llc -verify-machineinstrs -mtriple powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
+; RUN: | FileCheck %s --check-prefix=BE
+; RUN: llc -verify-machineinstrs -mtriple powerpc-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
+; RUN: | FileCheck %s --check-prefix=BE-32
+
+
+; Function Attrs: nounwind
+define dso_local <4 x i16> @shufflevector_combine(<4 x i32> %0) #0 {
+; AIX-LABEL: shufflevector_combine:
+; AIX: # %bb.0: # %newFuncRoot
+; AIX-NEXT: ld 3, L..C0(2) # %const.0
+; AIX-NEXT: xxlxor 36, 36, 36
+; AIX-NEXT: lxv 35, 0(3)
+; AIX-NEXT: li 3, 0
+; AIX-NEXT: vperm 2, 4, 2, 3
+; AIX-NEXT: vinsw 2, 3, 8
+; AIX-NEXT: vpkuwum 2, 2, 2
+; AIX-NEXT: blr
+;
+; AIX-32-LABEL: shufflevector_combine:
+; AIX-32: # %bb.0: # %newFuncRoot
+; AIX-32-NEXT: lwz 3, L..C0(2) # %const.0
+; AIX-32-NEXT: xxlxor 36, 36, 36
+; AIX-32-NEXT: lxv 35, 0(3)
+; AIX-32-NEXT: li 3, 0
+; AIX-32-NEXT: vperm 2, 4, 2, 3
+; AIX-32-NEXT: vinsw 2, 3, 8
+; AIX-32-NEXT: vpkuwum 2, 2, 2
+; AIX-32-NEXT: blr
+;
+; LE-LABEL: shufflevector_combine:
+; LE: # %bb.0: # %newFuncRoot
+; LE-NEXT: plxv v3, .LCPI0_0 at PCREL(0), 1
+; LE-NEXT: xxlxor v4, v4, v4
+; LE-NEXT: li r3, 0
+; LE-NEXT: vperm v2, v2, v4, v3
+; LE-NEXT: vinsw v2, r3, 4
+; LE-NEXT: vpkuwum v2, v2, v2
+; LE-NEXT: blr
+;
+; LE-32-LABEL: shufflevector_combine:
+; LE-32: # %bb.0: # %newFuncRoot
+; LE-32-NEXT: li r3, .LCPI0_0 at l
+; LE-32-NEXT: lis r4, .LCPI0_0 at ha
+; LE-32-NEXT: xxlxor v4, v4, v4
+; LE-32-NEXT: lxvx v3, r4, r3
+; LE-32-NEXT: li r3, 0
+; LE-32-NEXT: vperm v2, v2, v4, v3
+; LE-32-NEXT: vinsw v2, r3, 4
+; LE-32-NEXT: vpkuwum v2, v2, v2
+; LE-32-NEXT: blr
+;
+; BE-LABEL: shufflevector_combine:
+; BE: # %bb.0: # %newFuncRoot
+; BE-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
+; BE-NEXT: xxlxor v4, v4, v4
+; BE-NEXT: addi r3, r3, .LCPI0_0 at toc@l
+; BE-NEXT: lxv v3, 0(r3)
+; BE-NEXT: li r3, 0
+; BE-NEXT: vperm v2, v4, v2, v3
+; BE-NEXT: vinsw v2, r3, 8
+; BE-NEXT: vpkuwum v2, v2, v2
+; BE-NEXT: blr
+;
+; BE-32-LABEL: shufflevector_combine:
+; BE-32: # %bb.0: # %newFuncRoot
+; BE-32-NEXT: li r3, .LCPI0_0 at l
+; BE-32-NEXT: lis r4, .LCPI0_0 at ha
+; BE-32-NEXT: xxlxor v4, v4, v4
+; BE-32-NEXT: lxvx v3, r4, r3
+; BE-32-NEXT: li r3, 0
+; BE-32-NEXT: vperm v2, v4, v2, v3
+; BE-32-NEXT: vinsw v2, r3, 8
+; BE-32-NEXT: vpkuwum v2, v2, v2
+; BE-32-NEXT: blr
+newFuncRoot:
+ %1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %0, <4 x i32> <i32 0, i32 7, i32 undef, i32 6>
+ %2 = insertelement <4 x i32> %1, i32 0, i64 2
+ %3 = trunc <4 x i32> %2 to <4 x i16>
+ ret <4 x i16> %3
+}
+
+attributes #0 = { nounwind }
More information about the llvm-commits
mailing list