[llvm-branch-commits] [llvm] 86b0c6e - [SelectionDAG] Correctly reduce BV to shuffle with zero on big endian

Tom Stellard via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Apr 4 11:33:39 PDT 2023


Author: Nemanja Ivanovic
Date: 2023-04-04T11:32:51-07:00
New Revision: 86b0c6e4050261823f07a3fbfd272bea0cda778e

URL: https://github.com/llvm/llvm-project/commit/86b0c6e4050261823f07a3fbfd272bea0cda778e
DIFF: https://github.com/llvm/llvm-project/commit/86b0c6e4050261823f07a3fbfd272bea0cda778e.diff

LOG: [SelectionDAG] Correctly reduce BV to shuffle with zero on big endian

This DAG combine is correct on little endian targets but
is incorrect on big endian targets.
Add big endian code to correct it.

Differential revision: https://reviews.llvm.org/D146460

Added: 
    llvm/test/CodeGen/PowerPC/pr61315.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index eed3d820c1204..d9cde609e5992 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -21361,10 +21361,9 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
       // the source vector. The high bits map to zero. We will use a zero vector
       // as the 2nd source operand of the shuffle, so use the 1st element of
       // that vector (mask value is number-of-elements) for the high bits.
-      if (i % ZextRatio == 0)
-        ShufMask[i] = Extract.getConstantOperandVal(1);
-      else
-        ShufMask[i] = NumMaskElts;
+      int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
+      ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
+                                           : NumMaskElts;
     }
 
     // Undef elements of the build vector remain undef because we initialize

diff  --git a/llvm/test/CodeGen/PowerPC/pr61315.ll b/llvm/test/CodeGen/PowerPC/pr61315.ll
new file mode 100644
index 0000000000000..de65945f963ad
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr61315.ll
@@ -0,0 +1,133 @@
+; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:   -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s
+define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) local_unnamed_addr #0 {
+; CHECK: .LCPI0_0:
+; CHECK-NEXT: .byte	7                               # 0x7
+; CHECK-NEXT: .byte	7                               # 0x7
+; CHECK-NEXT: .byte	7                               # 0x7
+; CHECK-NEXT: .byte	7                               # 0x7
+; CHECK-NEXT: .byte	7                               # 0x7
+; CHECK-NEXT: .byte	7                               # 0x7
+; CHECK-NEXT: .byte	7                               # 0x7
+; CHECK-NEXT: .byte	7                               # 0x7
+; CHECK-NEXT: .byte	16                              # 0x10
+; CHECK-NEXT: .byte	16                              # 0x10
+; CHECK-NEXT: .byte	16                              # 0x10
+; CHECK-NEXT: .byte	16                              # 0x10
+; CHECK-NEXT: .byte	16                              # 0x10
+; CHECK-NEXT: .byte	16                              # 0x10
+; CHECK-NEXT: .byte	16                              # 0x10
+; CHECK-NEXT: .byte	16                              # 0x10
+; CHECK-LABEL: ConvertExtractedMaskBitsToVect:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-NEXT:    xxlxor v4, v4, v4
+; CHECK-NEXT:    xxlxor v3, v3, v3
+; CHECK-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-NEXT:    lxv vs0, 0(r3)
+; CHECK-NEXT:    addis r3, r2, .LCPI0_1 at toc@ha
+; CHECK-NEXT:    addi r3, r3, .LCPI0_1 at toc@l
+; CHECK-NEXT:    xxperm v4, v2, vs0
+; CHECK-NEXT:    lxv vs0, 0(r3)
+; CHECK-NEXT:    xxland v2, v4, vs0
+; CHECK-NEXT:    vcmpequb v2, v2, v3
+; CHECK-NEXT:    xxlnor v2, v2, v2
+; CHECK-NEXT:    blr
+  %a4 = extractelement <16 x i8> %0, i64 7
+  %a5 = zext i8 %a4 to i16
+  %a6 = insertelement <8 x i16> poison, i16 %a5, i64 0
+  %a7 = bitcast <8 x i16> %a6 to <16 x i8>
+  %a8 = shufflevector <16 x i8> %a7, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %a9 = and <16 x i8> %a8, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+  %a10 = icmp eq <16 x i8> %a9, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+  %a11 = sext <16 x i1> %a10 to <16 x i8>
+  ret <16 x i8> %a11
+}
+
+define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) local_unnamed_addr #0 {
+; CHECK: .LCPI1_0:
+; CHECK-NEXT: .byte	7                               # 0x7
+; CHECK-NEXT: .byte	7                               # 0x7
+; CHECK-NEXT: .byte	7                               # 0x7
+; CHECK-NEXT: .byte	7                               # 0x7
+; CHECK-NEXT: .byte	7                               # 0x7
+; CHECK-NEXT: .byte	7                               # 0x7
+; CHECK-NEXT: .byte	7                               # 0x7
+; CHECK-NEXT: .byte	7                               # 0x7
+; CHECK-NEXT: .byte	16                              # 0x10
+; CHECK-NEXT: .byte	16                              # 0x10
+; CHECK-NEXT: .byte	16                              # 0x10
+; CHECK-NEXT: .byte	16                              # 0x10
+; CHECK-NEXT: .byte	16                              # 0x10
+; CHECK-NEXT: .byte	16                              # 0x10
+; CHECK-NEXT: .byte	16                              # 0x10
+; CHECK-NEXT: .byte	16                              # 0x10
+; CHECK-LABEL: ConvertExtractedMaskBitsToVect2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-NEXT:    xxlxor v4, v4, v4
+; CHECK-NEXT:    xxlxor v3, v3, v3
+; CHECK-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-NEXT:    lxv vs0, 0(r3)
+; CHECK-NEXT:    addis r3, r2, .LCPI1_1 at toc@ha
+; CHECK-NEXT:    addi r3, r3, .LCPI1_1 at toc@l
+; CHECK-NEXT:    xxperm v4, v2, vs0
+; CHECK-NEXT:    lxv vs0, 0(r3)
+; CHECK-NEXT:    xxland v2, v4, vs0
+; CHECK-NEXT:    vcmpequb v2, v2, v3
+; CHECK-NEXT:    xxlnor v2, v2, v2
+; CHECK-NEXT:    blr
+  %a4 = extractelement <16 x i8> %0, i64 7
+  %a5 = zext i8 %a4 to i32
+  %a6 = insertelement <4 x i32> poison, i32 %a5, i64 0
+  %a7 = bitcast <4 x i32> %a6 to <16 x i8>
+  %a8 = shufflevector <16 x i8> %a7, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %a9 = and <16 x i8> %a8, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+  %a10 = icmp eq <16 x i8> %a9, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+  %a11 = sext <16 x i1> %a10 to <16 x i8>
+  ret <16 x i8> %a11
+}
+
+define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) local_unnamed_addr #0 {
+; CHECK: .LCPI2_0:
+; CHECK-NEXT: .byte	6                                # 0x6
+; CHECK-NEXT: .byte	7                                # 0x7
+; CHECK-NEXT: .byte	6                                # 0x6
+; CHECK-NEXT: .byte	7                                # 0x7
+; CHECK-NEXT: .byte	6                                # 0x6
+; CHECK-NEXT: .byte	7                                # 0x7
+; CHECK-NEXT: .byte	6                                # 0x6
+; CHECK-NEXT: .byte	7                                # 0x7
+; CHECK-NEXT: .byte	16                               # 0x10
+; CHECK-NEXT: .byte	16                               # 0x10
+; CHECK-NEXT: .byte	16                               # 0x10
+; CHECK-NEXT: .byte	16                               # 0x10
+; CHECK-NEXT: .byte	16                               # 0x10
+; CHECK-NEXT: .byte	16                               # 0x10
+; CHECK-NEXT: .byte	16                               # 0x10
+; CHECK-NEXT: .byte	16                               # 0x10
+; CHECK-LABEL: ConvertExtractedMaskBitsToVect3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-NEXT:    xxlxor v4, v4, v4
+; CHECK-NEXT:    xxlxor v3, v3, v3
+; CHECK-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-NEXT:    lxv vs0, 0(r3)
+; CHECK-NEXT:    addis r3, r2, .LCPI2_1 at toc@ha
+; CHECK-NEXT:    addi r3, r3, .LCPI2_1 at toc@l
+; CHECK-NEXT:    xxperm v4, v2, vs0
+; CHECK-NEXT:    lxv vs0, 0(r3)
+; CHECK-NEXT:    xxland v2, v4, vs0
+; CHECK-NEXT:    vcmpequb v2, v2, v3
+; CHECK-NEXT:    xxlnor v2, v2, v2
+; CHECK-NEXT:    blr
+  %a4 = extractelement <8 x i16> %0, i64 3
+  %a5 = zext i16 %a4 to i32
+  %a6 = insertelement <4 x i32> poison, i32 %a5, i64 0
+  %a7 = bitcast <4 x i32> %a6 to <16 x i8>
+  %a8 = shufflevector <16 x i8> %a7, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %a9 = and <16 x i8> %a8, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+  %a10 = icmp eq <16 x i8> %a9, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+  %a11 = sext <16 x i1> %a10 to <16 x i8>
+  ret <16 x i8> %a11
+}


        


More information about the llvm-branch-commits mailing list