[llvm] e7c35d7 - [SelectionDAG] Correctly reduce BV to shuffle with zero on big endian
Nemanja Ivanovic via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 24 07:57:25 PDT 2023
Author: Nemanja Ivanovic
Date: 2023-03-24T10:57:17-04:00
New Revision: e7c35d71007fab6e6729a0cfa821023128de2f74
URL: https://github.com/llvm/llvm-project/commit/e7c35d71007fab6e6729a0cfa821023128de2f74
DIFF: https://github.com/llvm/llvm-project/commit/e7c35d71007fab6e6729a0cfa821023128de2f74.diff
LOG: [SelectionDAG] Correctly reduce BV to shuffle with zero on big endian
This DAG combine is correct on little endian targets but
is incorrect on big endian targets.
Add big endian code to correct it.
Differential revision: https://reviews.llvm.org/D146460
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/PowerPC/pr61315.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 20d7447802c8a..31b584ee9f8dd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22010,10 +22010,9 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
// the source vector. The high bits map to zero. We will use a zero vector
// as the 2nd source operand of the shuffle, so use the 1st element of
// that vector (mask value is number-of-elements) for the high bits.
- if (i % ZextRatio == 0)
- ShufMask[i] = Extract.getConstantOperandVal(1);
- else
- ShufMask[i] = NumMaskElts;
+ int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
+ ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
+ : NumMaskElts;
}
// Undef elements of the build vector remain undef because we initialize
diff --git a/llvm/test/CodeGen/PowerPC/pr61315.ll b/llvm/test/CodeGen/PowerPC/pr61315.ll
index 9a763e16122cb..de65945f963ad 100644
--- a/llvm/test/CodeGen/PowerPC/pr61315.ll
+++ b/llvm/test/CodeGen/PowerPC/pr61315.ll
@@ -2,14 +2,6 @@
; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) local_unnamed_addr #0 {
; CHECK: .LCPI0_0:
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 16 # 0x10
; CHECK-NEXT: .byte 7 # 0x7
; CHECK-NEXT: .byte 7 # 0x7
; CHECK-NEXT: .byte 7 # 0x7
@@ -18,6 +10,14 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0)
; CHECK-NEXT: .byte 7 # 0x7
; CHECK-NEXT: .byte 7 # 0x7
; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
; CHECK-LABEL: ConvertExtractedMaskBitsToVect:
; CHECK: # %bb.0:
; CHECK-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
@@ -46,14 +46,6 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0)
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) local_unnamed_addr #0 {
; CHECK: .LCPI1_0:
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 16 # 0x10
; CHECK-NEXT: .byte 7 # 0x7
; CHECK-NEXT: .byte 7 # 0x7
; CHECK-NEXT: .byte 7 # 0x7
@@ -62,6 +54,14 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0
; CHECK-NEXT: .byte 7 # 0x7
; CHECK-NEXT: .byte 7 # 0x7
; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
; CHECK-LABEL: ConvertExtractedMaskBitsToVect2:
; CHECK: # %bb.0:
; CHECK-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
@@ -90,22 +90,22 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) local_unnamed_addr #0 {
; CHECK: .LCPI2_0:
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 17 # 0x11
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 17 # 0x11
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 17 # 0x11
-; CHECK-NEXT: .byte 16 # 0x10
-; CHECK-NEXT: .byte 17 # 0x11
-; CHECK-NEXT: .byte 6 # 0x6
-; CHECK-NEXT: .byte 6 # 0x6
-; CHECK-NEXT: .byte 6 # 0x6
-; CHECK-NEXT: .byte 6 # 0x6
-; CHECK-NEXT: .byte 6 # 0x6
-; CHECK-NEXT: .byte 6 # 0x6
-; CHECK-NEXT: .byte 6 # 0x6
-; CHECK-NEXT: .byte 6 # 0x6
+; CHECK-NEXT: .byte 6 # 0x6
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 6 # 0x6
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 6 # 0x6
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 6 # 0x6
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
; CHECK-LABEL: ConvertExtractedMaskBitsToVect3:
; CHECK: # %bb.0:
; CHECK-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
More information about the llvm-commits
mailing list