[llvm-branch-commits] [llvm] 86b0c6e - [SelectionDAG] Correctly reduce BV to shuffle with zero on big endian
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Apr 4 11:33:39 PDT 2023
Author: Nemanja Ivanovic
Date: 2023-04-04T11:32:51-07:00
New Revision: 86b0c6e4050261823f07a3fbfd272bea0cda778e
URL: https://github.com/llvm/llvm-project/commit/86b0c6e4050261823f07a3fbfd272bea0cda778e
DIFF: https://github.com/llvm/llvm-project/commit/86b0c6e4050261823f07a3fbfd272bea0cda778e.diff
LOG: [SelectionDAG] Correctly reduce BV to shuffle with zero on big endian
This DAG combine is correct on little endian targets but
is incorrect on big endian targets.
Add big endian code to correct it.
Differential revision: https://reviews.llvm.org/D146460
Added:
llvm/test/CodeGen/PowerPC/pr61315.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index eed3d820c1204..d9cde609e5992 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -21361,10 +21361,9 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
// the source vector. The high bits map to zero. We will use a zero vector
// as the 2nd source operand of the shuffle, so use the 1st element of
// that vector (mask value is number-of-elements) for the high bits.
- if (i % ZextRatio == 0)
- ShufMask[i] = Extract.getConstantOperandVal(1);
- else
- ShufMask[i] = NumMaskElts;
+ int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
+ ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
+ : NumMaskElts;
}
// Undef elements of the build vector remain undef because we initialize
diff --git a/llvm/test/CodeGen/PowerPC/pr61315.ll b/llvm/test/CodeGen/PowerPC/pr61315.ll
new file mode 100644
index 0000000000000..de65945f963ad
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr61315.ll
@@ -0,0 +1,133 @@
+; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s
+define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) local_unnamed_addr #0 {
+; CHECK: .LCPI0_0:
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-LABEL: ConvertExtractedMaskBitsToVect:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-NEXT: xxlxor v4, v4, v4
+; CHECK-NEXT: xxlxor v3, v3, v3
+; CHECK-NEXT: addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-NEXT: lxv vs0, 0(r3)
+; CHECK-NEXT: addis r3, r2, .LCPI0_1 at toc@ha
+; CHECK-NEXT: addi r3, r3, .LCPI0_1 at toc@l
+; CHECK-NEXT: xxperm v4, v2, vs0
+; CHECK-NEXT: lxv vs0, 0(r3)
+; CHECK-NEXT: xxland v2, v4, vs0
+; CHECK-NEXT: vcmpequb v2, v2, v3
+; CHECK-NEXT: xxlnor v2, v2, v2
+; CHECK-NEXT: blr
+ %a4 = extractelement <16 x i8> %0, i64 7
+ %a5 = zext i8 %a4 to i16
+ %a6 = insertelement <8 x i16> poison, i16 %a5, i64 0
+ %a7 = bitcast <8 x i16> %a6 to <16 x i8>
+ %a8 = shufflevector <16 x i8> %a7, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %a9 = and <16 x i8> %a8, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+ %a10 = icmp eq <16 x i8> %a9, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+ %a11 = sext <16 x i1> %a10 to <16 x i8>
+ ret <16 x i8> %a11
+}
+
+define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) local_unnamed_addr #0 {
+; CHECK: .LCPI1_0:
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-LABEL: ConvertExtractedMaskBitsToVect2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-NEXT: xxlxor v4, v4, v4
+; CHECK-NEXT: xxlxor v3, v3, v3
+; CHECK-NEXT: addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-NEXT: lxv vs0, 0(r3)
+; CHECK-NEXT: addis r3, r2, .LCPI1_1 at toc@ha
+; CHECK-NEXT: addi r3, r3, .LCPI1_1 at toc@l
+; CHECK-NEXT: xxperm v4, v2, vs0
+; CHECK-NEXT: lxv vs0, 0(r3)
+; CHECK-NEXT: xxland v2, v4, vs0
+; CHECK-NEXT: vcmpequb v2, v2, v3
+; CHECK-NEXT: xxlnor v2, v2, v2
+; CHECK-NEXT: blr
+ %a4 = extractelement <16 x i8> %0, i64 7
+ %a5 = zext i8 %a4 to i32
+ %a6 = insertelement <4 x i32> poison, i32 %a5, i64 0
+ %a7 = bitcast <4 x i32> %a6 to <16 x i8>
+ %a8 = shufflevector <16 x i8> %a7, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %a9 = and <16 x i8> %a8, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+ %a10 = icmp eq <16 x i8> %a9, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+ %a11 = sext <16 x i1> %a10 to <16 x i8>
+ ret <16 x i8> %a11
+}
+
+define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) local_unnamed_addr #0 {
+; CHECK: .LCPI2_0:
+; CHECK-NEXT: .byte 6 # 0x6
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 6 # 0x6
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 6 # 0x6
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 6 # 0x6
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-LABEL: ConvertExtractedMaskBitsToVect3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-NEXT: xxlxor v4, v4, v4
+; CHECK-NEXT: xxlxor v3, v3, v3
+; CHECK-NEXT: addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-NEXT: lxv vs0, 0(r3)
+; CHECK-NEXT: addis r3, r2, .LCPI2_1 at toc@ha
+; CHECK-NEXT: addi r3, r3, .LCPI2_1 at toc@l
+; CHECK-NEXT: xxperm v4, v2, vs0
+; CHECK-NEXT: lxv vs0, 0(r3)
+; CHECK-NEXT: xxland v2, v4, vs0
+; CHECK-NEXT: vcmpequb v2, v2, v3
+; CHECK-NEXT: xxlnor v2, v2, v2
+; CHECK-NEXT: blr
+ %a4 = extractelement <8 x i16> %0, i64 3
+ %a5 = zext i16 %a4 to i32
+ %a6 = insertelement <4 x i32> poison, i32 %a5, i64 0
+ %a7 = bitcast <4 x i32> %a6 to <16 x i8>
+ %a8 = shufflevector <16 x i8> %a7, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %a9 = and <16 x i8> %a8, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+ %a10 = icmp eq <16 x i8> %a9, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+ %a11 = sext <16 x i1> %a10 to <16 x i8>
+ ret <16 x i8> %a11
+}
More information about the llvm-branch-commits
mailing list