[llvm] [DAG] Fold freeze(shuffle(x,y,m)) -> shuffle(freeze(x),freeze(y),m) (PR #90952)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 3 02:27:40 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-systemz
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
If the shuffle mask contains no undef elements, then we can move the freeze through a shuffle node.
This requires special case handling to create a new ShuffleVectorSDNode.
---
Patch is 64.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/90952.diff
6 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+38-16)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+9)
- (modified) llvm/test/CodeGen/SystemZ/pr60413.ll (+86-90)
- (modified) llvm/test/CodeGen/X86/freeze-binary.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/setcc-non-simple-type.ll (+4-6)
- (modified) llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll (+214-520)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c0bbea16a64262..d8c2059fdff47e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15458,6 +15458,23 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
N0->getNumValues() != 1 || !N0->hasOneUse())
return SDValue();
+ auto FreezeOperand = [&](SDValue MaybePoisonOperand) {
+ // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
+ if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
+ return;
+ // First, freeze each offending operand.
+ SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
+ // Then, change all other uses of unfrozen operand to use frozen operand.
+ DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
+ if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
+ FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
+ // But, that also updated the use in the freeze we just created, thus
+ // creating a cycle in a DAG. Let's undo that by mutating the freeze.
+ DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
+ MaybePoisonOperand);
+ }
+ };
+
bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR ||
N0.getOpcode() == ISD::BUILD_PAIR ||
N0.getOpcode() == ISD::CONCAT_VECTORS;
@@ -15482,6 +15499,24 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
}
}
+ // Special case handling for ShuffleVectorSDNode nodes.
+ // Fold freeze(shuffle(x,y,m)) -> shuffle(freeze(x),freeze(y),m)
+ // iff shuffle mask doesn't contain any undef elements.
+ if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0)) {
+ FreezeOperand(SVN->getOperand(0));
+ FreezeOperand(SVN->getOperand(1));
+ // This node has been merged with another.
+ if (N->getOpcode() == ISD::DELETED_NODE)
+ return SDValue(N, 0);
+ // NOTE: Get the operands again in case they've been updated.
+ SDValue Op0 = SVN->getOperand(0);
+ SDValue Op1 = SVN->getOperand(1);
+ Op0 = Op0.isUndef() ? DAG.getFreeze(Op0) : Op0;
+ Op1 = Op1.isUndef() ? DAG.getFreeze(Op1) : Op1;
+ return DAG.getVectorShuffle(N0.getValueType(), SDLoc(N), Op0, Op1,
+ SVN->getMask());
+ }
+
SmallSetVector<SDValue, 8> MaybePoisonOperands;
for (SDValue Op : N0->ops()) {
if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
@@ -15500,22 +15535,9 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
// it could create undef or poison due to it's poison-generating flags.
// So not finding any maybe-poison operands is fine.
- for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
- // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
- if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
- continue;
- // First, freeze each offending operand.
- SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
- // Then, change all other uses of unfrozen operand to use frozen operand.
- DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
- if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
- FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
- // But, that also updated the use in the freeze we just created, thus
- // creating a cycle in a DAG. Let's undo that by mutating the freeze.
- DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
- MaybePoisonOperand);
- }
- }
+ // Freeze all uses of the different operands.
+ for (SDValue MaybePoisonOperand : MaybePoisonOperands)
+ FreezeOperand(MaybePoisonOperand);
// This node has been merged with another.
if (N->getOpcode() == ISD::DELETED_NODE)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 25a728bf9ba3e7..fef4b7dbf558ec 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5225,6 +5225,15 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements());
}
+ case ISD::VECTOR_SHUFFLE: {
+ // Check for any demanded shuffle element that is undef.
+ auto *SVN = cast<ShuffleVectorSDNode>(Op);
+ for (auto [Idx, Elt] : enumerate(SVN->getMask()))
+ if (Elt < 0 && DemandedElts[Idx])
+ return true;
+ return false;
+ }
+
default:
// Allow the target to implement this method for its nodes.
if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
diff --git a/llvm/test/CodeGen/SystemZ/pr60413.ll b/llvm/test/CodeGen/SystemZ/pr60413.ll
index 5a629567d07069..8a6a30318ae583 100644
--- a/llvm/test/CodeGen/SystemZ/pr60413.ll
+++ b/llvm/test/CodeGen/SystemZ/pr60413.ll
@@ -13,114 +13,110 @@ declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #0
define dso_local void @m() local_unnamed_addr #1 {
; CHECK-LABEL: m:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: stmg %r12, %r15, 96(%r15)
+; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
; CHECK-NEXT: aghi %r15, -168
-; CHECK-NEXT: llhrl %r2, f+4
-; CHECK-NEXT: sll %r2, 8
-; CHECK-NEXT: larl %r1, f
-; CHECK-NEXT: ic %r2, 6(%r1)
-; CHECK-NEXT: larl %r1, e
-; CHECK-NEXT: lb %r0, 3(%r1)
-; CHECK-NEXT: clfi %r2, 128
+; CHECK-NEXT: lhrl %r1, f+4
+; CHECK-NEXT: sll %r1, 8
+; CHECK-NEXT: larl %r2, f
+; CHECK-NEXT: ic %r1, 6(%r2)
+; CHECK-NEXT: larl %r2, e
+; CHECK-NEXT: lb %r0, 3(%r2)
+; CHECK-NEXT: vlvgp %v0, %r0, %r1
+; CHECK-NEXT: vlvgp %v1, %r1, %r0
+; CHECK-NEXT: vlvgf %v1, %r1, 0
+; CHECK-NEXT: vlvgf %v1, %r1, 2
+; CHECK-NEXT: vlvgp %v2, %r1, %r1
+; CHECK-NEXT: # kill: def $r1l killed $r1l killed $r1d
+; CHECK-NEXT: nilh %r1, 255
+; CHECK-NEXT: chi %r1, 128
; CHECK-NEXT: ipm %r1
; CHECK-NEXT: risbg %r1, %r1, 63, 191, 36
-; CHECK-NEXT: vlvgp %v1, %r2, %r0
-; CHECK-NEXT: vlvgf %v1, %r2, 0
-; CHECK-NEXT: vlvgf %v1, %r2, 2
-; CHECK-NEXT: vlvgp %v0, %r0, %r2
-; CHECK-NEXT: vlvgp %v2, %r2, %r2
-; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
-; CHECK-NEXT: nilh %r2, 255
-; CHECK-NEXT: chi %r2, 128
-; CHECK-NEXT: ipm %r2
-; CHECK-NEXT: risbg %r2, %r2, 63, 191, 36
; CHECK-NEXT: vlvgf %v0, %r0, 0
; CHECK-NEXT: vlvgf %v0, %r0, 2
-; CHECK-NEXT: vrepf %v2, %v2, 1
; CHECK-NEXT: vgbm %v3, 30583
; CHECK-NEXT: vn %v0, %v0, %v3
; CHECK-NEXT: vn %v1, %v1, %v3
+; CHECK-NEXT: vrepf %v2, %v2, 1
; CHECK-NEXT: vn %v2, %v2, %v3
; CHECK-NEXT: vrepif %v3, 127
; CHECK-NEXT: vchlf %v1, %v1, %v3
-; CHECK-NEXT: vlgvf %r12, %v1, 0
+; CHECK-NEXT: vlgvf %r13, %v1, 0
; CHECK-NEXT: vchlf %v2, %v2, %v3
-; CHECK-NEXT: vlgvf %r4, %v2, 1
-; CHECK-NEXT: nilf %r4, 1
-; CHECK-NEXT: vlgvf %r5, %v2, 0
-; CHECK-NEXT: risbg %r3, %r5, 48, 176, 15
-; CHECK-NEXT: rosbg %r3, %r4, 32, 49, 14
-; CHECK-NEXT: vlgvf %r14, %v2, 2
+; CHECK-NEXT: vlgvf %r3, %v2, 1
+; CHECK-NEXT: nilf %r3, 1
+; CHECK-NEXT: vlgvf %r4, %v2, 0
+; CHECK-NEXT: risbg %r2, %r4, 48, 176, 15
+; CHECK-NEXT: rosbg %r2, %r3, 32, 49, 14
+; CHECK-NEXT: vlgvf %r5, %v2, 2
+; CHECK-NEXT: nilf %r5, 1
+; CHECK-NEXT: rosbg %r2, %r5, 32, 50, 13
+; CHECK-NEXT: vlgvf %r14, %v2, 3
; CHECK-NEXT: nilf %r14, 1
-; CHECK-NEXT: rosbg %r3, %r14, 32, 50, 13
-; CHECK-NEXT: vlgvf %r13, %v2, 3
-; CHECK-NEXT: nilf %r13, 1
-; CHECK-NEXT: rosbg %r3, %r13, 32, 51, 12
-; CHECK-NEXT: rosbg %r3, %r12, 52, 52, 11
-; CHECK-NEXT: vlgvf %r12, %v1, 1
-; CHECK-NEXT: rosbg %r3, %r12, 53, 53, 10
-; CHECK-NEXT: vlgvf %r12, %v1, 2
-; CHECK-NEXT: rosbg %r3, %r12, 54, 54, 9
-; CHECK-NEXT: vlgvf %r12, %v1, 3
-; CHECK-NEXT: rosbg %r3, %r12, 55, 55, 8
+; CHECK-NEXT: rosbg %r2, %r14, 32, 51, 12
+; CHECK-NEXT: rosbg %r2, %r13, 52, 52, 11
+; CHECK-NEXT: vlgvf %r13, %v1, 1
+; CHECK-NEXT: rosbg %r2, %r13, 53, 53, 10
+; CHECK-NEXT: vlgvf %r13, %v1, 2
+; CHECK-NEXT: rosbg %r2, %r13, 54, 54, 9
+; CHECK-NEXT: vlgvf %r13, %v1, 3
+; CHECK-NEXT: rosbg %r2, %r13, 55, 55, 8
; CHECK-NEXT: vchlf %v0, %v0, %v3
-; CHECK-NEXT: vlgvf %r12, %v0, 0
-; CHECK-NEXT: rosbg %r3, %r12, 56, 56, 7
-; CHECK-NEXT: vlgvf %r12, %v0, 1
-; CHECK-NEXT: rosbg %r3, %r12, 57, 57, 6
-; CHECK-NEXT: vlgvf %r12, %v0, 2
-; CHECK-NEXT: rosbg %r3, %r12, 58, 58, 5
-; CHECK-NEXT: vlgvf %r12, %v0, 3
-; CHECK-NEXT: rosbg %r3, %r12, 59, 59, 4
-; CHECK-NEXT: nilf %r5, 1
-; CHECK-NEXT: rosbg %r3, %r5, 32, 60, 3
-; CHECK-NEXT: rosbg %r3, %r4, 32, 61, 2
-; CHECK-NEXT: rosbg %r3, %r14, 32, 62, 1
-; CHECK-NEXT: or %r3, %r13
-; CHECK-NEXT: vlgvb %r5, %v0, 1
-; CHECK-NEXT: vlgvb %r4, %v0, 0
-; CHECK-NEXT: risbg %r4, %r4, 48, 176, 15
-; CHECK-NEXT: rosbg %r4, %r5, 49, 49, 14
-; CHECK-NEXT: vlgvb %r5, %v0, 2
-; CHECK-NEXT: rosbg %r4, %r5, 50, 50, 13
-; CHECK-NEXT: vlgvb %r5, %v0, 3
-; CHECK-NEXT: rosbg %r4, %r5, 51, 51, 12
-; CHECK-NEXT: vlgvb %r5, %v0, 4
-; CHECK-NEXT: rosbg %r4, %r5, 52, 52, 11
-; CHECK-NEXT: vlgvb %r5, %v0, 5
-; CHECK-NEXT: rosbg %r4, %r5, 53, 53, 10
-; CHECK-NEXT: vlgvb %r5, %v0, 6
-; CHECK-NEXT: rosbg %r4, %r5, 54, 54, 9
-; CHECK-NEXT: vlgvb %r5, %v0, 7
-; CHECK-NEXT: rosbg %r4, %r5, 55, 55, 8
-; CHECK-NEXT: vlgvb %r5, %v0, 8
-; CHECK-NEXT: rosbg %r4, %r5, 56, 56, 7
-; CHECK-NEXT: vlgvb %r5, %v0, 9
-; CHECK-NEXT: rosbg %r4, %r5, 57, 57, 6
-; CHECK-NEXT: vlgvb %r5, %v0, 10
-; CHECK-NEXT: rosbg %r4, %r5, 58, 58, 5
-; CHECK-NEXT: vlgvb %r5, %v0, 11
-; CHECK-NEXT: rosbg %r4, %r5, 59, 59, 4
-; CHECK-NEXT: vlgvb %r5, %v0, 12
-; CHECK-NEXT: rosbg %r4, %r5, 60, 60, 3
-; CHECK-NEXT: vlgvb %r5, %v0, 13
-; CHECK-NEXT: rosbg %r4, %r5, 61, 61, 2
-; CHECK-NEXT: vlgvb %r5, %v0, 14
-; CHECK-NEXT: rosbg %r4, %r5, 62, 62, 1
-; CHECK-NEXT: vlgvb %r5, %v0, 15
-; CHECK-NEXT: rosbg %r4, %r5, 63, 63, 0
-; CHECK-NEXT: xilf %r4, 4294967295
-; CHECK-NEXT: or %r4, %r3
-; CHECK-NEXT: tmll %r4, 65535
-; CHECK-NEXT: ipm %r3
-; CHECK-NEXT: afi %r3, -268435456
-; CHECK-NEXT: srl %r3, 31
+; CHECK-NEXT: vlgvf %r13, %v0, 0
+; CHECK-NEXT: rosbg %r2, %r13, 56, 56, 7
+; CHECK-NEXT: vlgvf %r13, %v0, 1
+; CHECK-NEXT: rosbg %r2, %r13, 57, 57, 6
+; CHECK-NEXT: vlgvf %r13, %v0, 2
+; CHECK-NEXT: rosbg %r2, %r13, 58, 58, 5
+; CHECK-NEXT: vlgvf %r13, %v0, 3
+; CHECK-NEXT: rosbg %r2, %r13, 59, 59, 4
+; CHECK-NEXT: nilf %r4, 1
+; CHECK-NEXT: rosbg %r2, %r4, 32, 60, 3
+; CHECK-NEXT: rosbg %r2, %r3, 32, 61, 2
+; CHECK-NEXT: rosbg %r2, %r5, 32, 62, 1
+; CHECK-NEXT: or %r2, %r14
+; CHECK-NEXT: vlgvb %r4, %v0, 1
+; CHECK-NEXT: vlgvb %r3, %v0, 0
+; CHECK-NEXT: risbg %r3, %r3, 48, 176, 15
+; CHECK-NEXT: rosbg %r3, %r4, 49, 49, 14
+; CHECK-NEXT: vlgvb %r4, %v0, 2
+; CHECK-NEXT: rosbg %r3, %r4, 50, 50, 13
+; CHECK-NEXT: vlgvb %r4, %v0, 3
+; CHECK-NEXT: rosbg %r3, %r4, 51, 51, 12
+; CHECK-NEXT: vlgvb %r4, %v0, 4
+; CHECK-NEXT: rosbg %r3, %r4, 52, 52, 11
+; CHECK-NEXT: vlgvb %r4, %v0, 5
+; CHECK-NEXT: rosbg %r3, %r4, 53, 53, 10
+; CHECK-NEXT: vlgvb %r4, %v0, 6
+; CHECK-NEXT: rosbg %r3, %r4, 54, 54, 9
+; CHECK-NEXT: vlgvb %r4, %v0, 7
+; CHECK-NEXT: rosbg %r3, %r4, 55, 55, 8
+; CHECK-NEXT: vlgvb %r4, %v0, 8
+; CHECK-NEXT: rosbg %r3, %r4, 56, 56, 7
+; CHECK-NEXT: vlgvb %r4, %v0, 9
+; CHECK-NEXT: rosbg %r3, %r4, 57, 57, 6
+; CHECK-NEXT: vlgvb %r4, %v0, 10
+; CHECK-NEXT: rosbg %r3, %r4, 58, 58, 5
+; CHECK-NEXT: vlgvb %r4, %v0, 11
+; CHECK-NEXT: rosbg %r3, %r4, 59, 59, 4
+; CHECK-NEXT: vlgvb %r4, %v0, 12
+; CHECK-NEXT: rosbg %r3, %r4, 60, 60, 3
+; CHECK-NEXT: vlgvb %r4, %v0, 13
+; CHECK-NEXT: rosbg %r3, %r4, 61, 61, 2
+; CHECK-NEXT: vlgvb %r4, %v0, 14
+; CHECK-NEXT: rosbg %r3, %r4, 62, 62, 1
+; CHECK-NEXT: vlgvb %r4, %v0, 15
+; CHECK-NEXT: rosbg %r3, %r4, 63, 63, 0
+; CHECK-NEXT: xilf %r3, 4294967295
+; CHECK-NEXT: or %r3, %r2
+; CHECK-NEXT: tmll %r3, 65535
+; CHECK-NEXT: ipm %r2
+; CHECK-NEXT: afi %r2, -268435456
+; CHECK-NEXT: srl %r2, 31
; CHECK-NEXT: nr %r2, %r1
-; CHECK-NEXT: nr %r2, %r3
; CHECK-NEXT: nr %r2, %r0
; CHECK-NEXT: larl %r1, g
; CHECK-NEXT: stc %r2, 0(%r1)
-; CHECK-NEXT: lmg %r12, %r15, 264(%r15)
+; CHECK-NEXT: lmg %r13, %r15, 272(%r15)
; CHECK-NEXT: br %r14
entry:
%n = alloca i32, align 4
diff --git a/llvm/test/CodeGen/X86/freeze-binary.ll b/llvm/test/CodeGen/X86/freeze-binary.ll
index c79da37988e40b..dbc027495297b1 100644
--- a/llvm/test/CodeGen/X86/freeze-binary.ll
+++ b/llvm/test/CodeGen/X86/freeze-binary.ll
@@ -546,8 +546,8 @@ define <8 x i16> @freeze_ashr_vec(<8 x i16> %a0) nounwind {
define <4 x i32> @freeze_ashr_vec_outofrange(<4 x i32> %a0) nounwind {
; X86-LABEL: freeze_ashr_vec_outofrange:
; X86: # %bb.0:
-; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
; X86-NEXT: psrad $1, %xmm0
+; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
; X86-NEXT: psrad $2, %xmm0
; X86-NEXT: retl
;
@@ -660,8 +660,8 @@ define <8 x i16> @freeze_lshr_vec(<8 x i16> %a0) nounwind {
define <4 x i32> @freeze_lshr_vec_outofrange(<4 x i32> %a0) nounwind {
; X86-LABEL: freeze_lshr_vec_outofrange:
; X86: # %bb.0:
-; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
; X86-NEXT: psrld $1, %xmm0
+; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
; X86-NEXT: psrld $2, %xmm0
; X86-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/setcc-non-simple-type.ll b/llvm/test/CodeGen/X86/setcc-non-simple-type.ll
index a80d8d8cd01b85..76cf2423a254fe 100644
--- a/llvm/test/CodeGen/X86/setcc-non-simple-type.ll
+++ b/llvm/test/CodeGen/X86/setcc-non-simple-type.ll
@@ -121,12 +121,10 @@ define void @failing(ptr %0, ptr %1) nounwind {
; CHECK-AVX2-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-AVX2-NEXT: vmovdqu 1024(%rdx,%rsi), %xmm5
; CHECK-AVX2-NEXT: vmovdqu 1040(%rdx,%rsi), %xmm6
-; CHECK-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm7 = xmm5[0],xmm6[0]
-; CHECK-AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm5 = xmm5[1],xmm6[1]
-; CHECK-AVX2-NEXT: vmovq %xmm5, %rdi
-; CHECK-AVX2-NEXT: vpextrq $1, %xmm5, %r8
-; CHECK-AVX2-NEXT: vmovq %xmm7, %r9
-; CHECK-AVX2-NEXT: vpextrq $1, %xmm7, %r10
+; CHECK-AVX2-NEXT: vpextrq $1, %xmm5, %rdi
+; CHECK-AVX2-NEXT: vpextrq $1, %xmm6, %r8
+; CHECK-AVX2-NEXT: vmovq %xmm5, %r9
+; CHECK-AVX2-NEXT: vmovq %xmm6, %r10
; CHECK-AVX2-NEXT: negq %r10
; CHECK-AVX2-NEXT: movq %rcx, %r10
; CHECK-AVX2-NEXT: sbbq %r8, %r10
diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
index f7a27a5b914466..9ae1f270e88337 100644
--- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
+++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
@@ -65,7 +65,6 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X64-NO-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half:
; X64-NO-BMI2: # %bb.0:
; X64-NO-BMI2-NEXT: movzwl (%rdi), %eax
-; X64-NO-BMI2-NEXT: movzwl %ax, %eax
; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NO-BMI2-NEXT: shrl %cl, %eax
@@ -75,7 +74,6 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X64-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movzwl (%rdi), %eax
-; X64-BMI2-NEXT: movzwl %ax, %eax
; X64-BMI2-NEXT: shll $3, %esi
; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
; X64-BMI2-NEXT: movb %al, (%rdx)
@@ -83,15 +81,14 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
;
; X86-NO-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half:
; X86-NO-BMI2: # %bb.0:
-; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NO-BMI2-NEXT: movzwl (%edx), %edx
-; X86-NO-BMI2-NEXT: movzwl %dx, %edx
+; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NO-BMI2-NEXT: movzwl (%eax), %eax
; X86-NO-BMI2-NEXT: shll $3, %ecx
; X86-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NO-BMI2-NEXT: shrl %cl, %edx
-; X86-NO-BMI2-NEXT: movb %dl, (%eax)
+; X86-NO-BMI2-NEXT: shrl %cl, %eax
+; X86-NO-BMI2-NEXT: movb %al, (%edx)
; X86-NO-BMI2-NEXT: retl
;
; X86-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half:
@@ -100,7 +97,6 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movzwl (%edx), %edx
-; X86-BMI2-NEXT: movzwl %dx, %edx
; X86-BMI2-NEXT: shll $3, %ecx
; X86-BMI2-NEXT: shrxl %ecx, %edx, %ecx
; X86-BMI2-NEXT: movb %cl, (%eax)
@@ -123,7 +119,6 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X64-NO-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca_with_zero_upper_half:
; X64-NO-BMI2: # %bb.0:
; X64-NO-BMI2-NEXT: movzwl (%rdi), %eax
-; X64-NO-BMI2-NEXT: movzwl %ax, %eax
; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NO-BMI2-NEXT: shrl %cl, %eax
@@ -133,7 +128,6 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X64-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca_with_zero_upper_half:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movzwl (%rdi), %eax
-; X64-BMI2-NEXT: movzwl %ax, %eax
; X64-BMI2-NEXT: shll $3, %esi
; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
; X64-BMI2-NEXT: movw %ax, (%rdx)
@@ -145,7 +139,6 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NO-BMI2-NEXT: movzwl (%edx), %edx
-; X86-NO-BMI2-NEXT: movzwl %dx, %edx
; X86-NO-BMI2-NEXT: shll $3, %ecx
; X86-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NO-BMI2-NEXT: shrl %cl, %edx
@@ -158,7 +151,6 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movzwl (%edx), %edx
-; X86-BMI2-NEXT: movzwl %dx, %edx
; X86-BMI2-NEXT: shll $3, %ecx
; X86-BMI2-NEXT: shrxl %ecx, %edx, %ecx
; X86-BMI2-NEXT: movw %cx, (%eax)
@@ -179,9 +171,8 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; X64-NO-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half:
; X64-NO-BMI2: # %bb.0:
-; X64-NO-BMI2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
-; X64-NO-BMI2-NEXT: movq %xmm0, %rax
+; X64-NO-BMI2-NEXT: movl (%rdi), %eax
; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NO-BMI2-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NEXT: movb %al, (%rdx)
@@ -189,9 +180,8 @@ define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64
;
; X64-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-BMI2-NEXT: shll $3, %esi
-; X64-BMI2-NEXT: movq %xmm0, %rax
+; X64-BMI2-NEXT: movl (%rdi), %eax
; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
; X64-BMI2-NEXT: movb %al, (%rdx)
; X64-BMI2-NEXT: retq
@@ -199,99 +189,49 @@ define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64
; X86-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_8byte_...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/90952
More information about the llvm-commits
mailing list