[llvm] c8dc6b5 - [SDAG] Improve `SimplifyDemandedBits` for mul (#90034)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 22 07:43:13 PDT 2024
Author: Yingwei Zheng
Date: 2024-05-22T22:43:10+08:00
New Revision: c8dc6b59d68635f73d2970b7fc8bc9c6c2684098
URL: https://github.com/llvm/llvm-project/commit/c8dc6b59d68635f73d2970b7fc8bc9c6c2684098
DIFF: https://github.com/llvm/llvm-project/commit/c8dc6b59d68635f73d2970b7fc8bc9c6c2684098.diff
LOG: [SDAG] Improve `SimplifyDemandedBits` for mul (#90034)
If the RHS is a constant with X trailing zeros, then the X MSBs of the
LHS are not demanded.
Alive2: https://alive2.llvm.org/ce/z/F5CyJW
Fixes https://github.com/llvm/llvm-project/issues/56645.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AArch64/neon-dotreduce.ll
llvm/test/CodeGen/RISCV/mul.ll
llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll
llvm/test/CodeGen/RISCV/rv64zba.ll
llvm/test/CodeGen/RISCV/sextw-removal.ll
llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll
llvm/test/CodeGen/X86/combine-srem.ll
llvm/test/CodeGen/X86/pmul.ll
llvm/test/CodeGen/X86/shrink_vmul.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 87c4c62522c1b..85bd45a88542b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2814,10 +2814,16 @@ bool TargetLowering::SimplifyDemandedBits(
unsigned DemandedBitsLZ = DemandedBits.countl_zero();
APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
KnownBits KnownOp0, KnownOp1;
- if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, KnownOp0, TLO,
- Depth + 1) ||
- SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
+ auto GetDemandedBitsLHSMask = [&](APInt Demanded,
+ const KnownBits &KnownRHS) {
+ if (Op.getOpcode() == ISD::MUL)
+ Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
+ return Demanded;
+ };
+ if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
Depth + 1) ||
+ SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
+ DemandedElts, KnownOp0, TLO, Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
index 736f66c935e74..40b8a47f92aa7 100644
--- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
+++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
@@ -1709,289 +1709,289 @@ define i32 @test_sdot_v33i8_double(<33 x i8> %a, <33 x i8> %b, <33 x i8> %c, <33
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: fmov s4, w0
; CHECK-NEXT: ldr b0, [sp, #80]
; CHECK-NEXT: add x8, sp, #88
-; CHECK-NEXT: ldr b2, [sp, #144]
-; CHECK-NEXT: fmov s4, w0
+; CHECK-NEXT: ldr b1, [sp, #144]
; CHECK-NEXT: add x10, sp, #152
-; CHECK-NEXT: ldr b3, [sp, #16]
+; CHECK-NEXT: ldr b6, [sp, #16]
; CHECK-NEXT: ld1 { v0.b }[1], [x8]
-; CHECK-NEXT: ld1 { v2.b }[1], [x10]
-; CHECK-NEXT: add x10, sp, #24
-; CHECK-NEXT: ldr b1, [sp, #344]
; CHECK-NEXT: add x9, sp, #96
-; CHECK-NEXT: ld1 { v3.b }[1], [x10]
-; CHECK-NEXT: add x10, sp, #352
+; CHECK-NEXT: ldr b2, [sp, #344]
; CHECK-NEXT: mov v4.b[1], w1
+; CHECK-NEXT: ld1 { v1.b }[1], [x10]
+; CHECK-NEXT: add x10, sp, #24
+; CHECK-NEXT: ld1 { v6.b }[1], [x10]
+; CHECK-NEXT: add x10, sp, #352
; CHECK-NEXT: add x8, sp, #104
; CHECK-NEXT: ld1 { v0.b }[2], [x9]
; CHECK-NEXT: add x9, sp, #160
-; CHECK-NEXT: ld1 { v1.b }[1], [x10]
-; CHECK-NEXT: ld1 { v2.b }[2], [x9]
-; CHECK-NEXT: add x9, sp, #32
-; CHECK-NEXT: add x12, sp, #360
-; CHECK-NEXT: ld1 { v3.b }[2], [x9]
+; CHECK-NEXT: ld1 { v2.b }[1], [x10]
+; CHECK-NEXT: ld1 { v1.b }[2], [x9]
+; CHECK-NEXT: add x10, sp, #32
; CHECK-NEXT: add x11, sp, #112
-; CHECK-NEXT: add x10, sp, #120
-; CHECK-NEXT: ld1 { v1.b }[2], [x12]
-; CHECK-NEXT: add x12, sp, #168
-; CHECK-NEXT: ld1 { v0.b }[3], [x8]
; CHECK-NEXT: mov v4.b[2], w2
-; CHECK-NEXT: ld1 { v2.b }[3], [x12]
-; CHECK-NEXT: add x12, sp, #40
-; CHECK-NEXT: ld1 { v3.b }[3], [x12]
-; CHECK-NEXT: add x13, sp, #176
-; CHECK-NEXT: ldr b16, [sp, #216]
-; CHECK-NEXT: ld1 { v0.b }[4], [x11]
-; CHECK-NEXT: add x11, sp, #48
-; CHECK-NEXT: add x12, sp, #368
-; CHECK-NEXT: ld1 { v2.b }[4], [x13]
+; CHECK-NEXT: ld1 { v6.b }[2], [x10]
+; CHECK-NEXT: add x10, sp, #168
+; CHECK-NEXT: ld1 { v0.b }[3], [x8]
+; CHECK-NEXT: ldr b5, [sp, #216]
; CHECK-NEXT: add x13, sp, #224
-; CHECK-NEXT: add x9, sp, #128
+; CHECK-NEXT: ld1 { v1.b }[3], [x10]
+; CHECK-NEXT: add x10, sp, #40
+; CHECK-NEXT: add x12, sp, #120
+; CHECK-NEXT: ld1 { v6.b }[3], [x10]
+; CHECK-NEXT: add x10, sp, #176
+; CHECK-NEXT: ld1 { v5.b }[1], [x13]
; CHECK-NEXT: mov v4.b[3], w3
-; CHECK-NEXT: ld1 { v3.b }[4], [x11]
-; CHECK-NEXT: ld1 { v16.b }[1], [x13]
-; CHECK-NEXT: ld1 { v0.b }[5], [x10]
-; CHECK-NEXT: add x10, sp, #56
-; CHECK-NEXT: ld1 { v1.b }[3], [x12]
-; CHECK-NEXT: add x12, sp, #184
-; CHECK-NEXT: ldr b5, [sp, #280]
-; CHECK-NEXT: add x11, sp, #376
-; CHECK-NEXT: ld1 { v3.b }[5], [x10]
-; CHECK-NEXT: ld1 { v2.b }[5], [x12]
-; CHECK-NEXT: add x10, sp, #232
+; CHECK-NEXT: ld1 { v0.b }[4], [x11]
+; CHECK-NEXT: add x11, sp, #48
+; CHECK-NEXT: add x8, sp, #360
+; CHECK-NEXT: ld1 { v1.b }[4], [x10]
+; CHECK-NEXT: add x13, sp, #56
+; CHECK-NEXT: ld1 { v6.b }[4], [x11]
+; CHECK-NEXT: ldr b7, [sp, #280]
+; CHECK-NEXT: ld1 { v2.b }[2], [x8]
+; CHECK-NEXT: add x15, sp, #232
+; CHECK-NEXT: ld1 { v0.b }[5], [x12]
+; CHECK-NEXT: add x14, sp, #184
; CHECK-NEXT: mov v4.b[4], w4
+; CHECK-NEXT: ld1 { v5.b }[2], [x15]
+; CHECK-NEXT: add x9, sp, #128
+; CHECK-NEXT: ld1 { v6.b }[5], [x13]
+; CHECK-NEXT: add x13, sp, #288
+; CHECK-NEXT: add x10, sp, #368
+; CHECK-NEXT: ld1 { v7.b }[1], [x13]
+; CHECK-NEXT: ld1 { v1.b }[5], [x14]
+; CHECK-NEXT: ld1 { v2.b }[3], [x10]
+; CHECK-NEXT: add x15, sp, #240
; CHECK-NEXT: ld1 { v0.b }[6], [x9]
-; CHECK-NEXT: add x9, sp, #288
-; CHECK-NEXT: add x15, sp, #64
-; CHECK-NEXT: ld1 { v16.b }[2], [x10]
-; CHECK-NEXT: ldr b17, [sp, #408]
-; CHECK-NEXT: ld1 { v5.b }[1], [x9]
-; CHECK-NEXT: add x14, sp, #192
-; CHECK-NEXT: ld1 { v1.b }[4], [x11]
-; CHECK-NEXT: ld1 { v3.b }[6], [x15]
-; CHECK-NEXT: add x15, sp, #416
-; CHECK-NEXT: ld1 { v2.b }[6], [x14]
-; CHECK-NEXT: add x14, sp, #240
-; CHECK-NEXT: ld1 { v17.b }[1], [x15]
; CHECK-NEXT: add x9, sp, #296
-; CHECK-NEXT: add x8, sp, #136
; CHECK-NEXT: mov v4.b[5], w5
-; CHECK-NEXT: add x13, sp, #384
-; CHECK-NEXT: ld1 { v16.b }[3], [x14]
-; CHECK-NEXT: ld1 { v5.b }[2], [x9]
-; CHECK-NEXT: ld1 { v1.b }[5], [x13]
-; CHECK-NEXT: ld1 { v0.b }[7], [x8]
-; CHECK-NEXT: add x8, sp, #424
-; CHECK-NEXT: add x9, sp, #248
-; CHECK-NEXT: ld1 { v17.b }[2], [x8]
-; CHECK-NEXT: add x8, sp, #304
-; CHECK-NEXT: add x10, sp, #392
-; CHECK-NEXT: ld1 { v16.b }[4], [x9]
-; CHECK-NEXT: ld1 { v5.b }[3], [x8]
+; CHECK-NEXT: add x11, sp, #192
+; CHECK-NEXT: ld1 { v5.b }[3], [x15]
+; CHECK-NEXT: ldr b3, [sp, #408]
+; CHECK-NEXT: ld1 { v7.b }[2], [x9]
+; CHECK-NEXT: add x12, sp, #64
+; CHECK-NEXT: add x13, sp, #376
+; CHECK-NEXT: ld1 { v1.b }[6], [x11]
+; CHECK-NEXT: add x11, sp, #416
+; CHECK-NEXT: ld1 { v6.b }[6], [x12]
+; CHECK-NEXT: add x12, sp, #248
+; CHECK-NEXT: ld1 { v3.b }[1], [x11]
; CHECK-NEXT: mov v4.b[6], w6
-; CHECK-NEXT: ld1 { v1.b }[6], [x10]
-; CHECK-NEXT: add x10, sp, #432
-; CHECK-NEXT: add x9, sp, #256
-; CHECK-NEXT: ld1 { v17.b }[3], [x10]
-; CHECK-NEXT: add x10, sp, #312
-; CHECK-NEXT: ldr b22, [sp, #608]
-; CHECK-NEXT: add x8, sp, #400
-; CHECK-NEXT: ld1 { v16.b }[5], [x9]
-; CHECK-NEXT: ld1 { v5.b }[4], [x10]
-; CHECK-NEXT: add x9, sp, #616
-; CHECK-NEXT: ld1 { v1.b }[7], [x8]
-; CHECK-NEXT: add x8, sp, #440
-; CHECK-NEXT: ld1 { v22.b }[1], [x9]
+; CHECK-NEXT: ld1 { v2.b }[4], [x13]
+; CHECK-NEXT: add x11, sp, #304
+; CHECK-NEXT: ld1 { v5.b }[4], [x12]
+; CHECK-NEXT: ld1 { v7.b }[3], [x11]
+; CHECK-NEXT: add x8, sp, #136
+; CHECK-NEXT: add x15, sp, #384
+; CHECK-NEXT: add x9, sp, #424
+; CHECK-NEXT: ld1 { v0.b }[7], [x8]
+; CHECK-NEXT: ld1 { v3.b }[2], [x9]
+; CHECK-NEXT: ld1 { v2.b }[5], [x15]
+; CHECK-NEXT: add x8, sp, #312
; CHECK-NEXT: mov v4.b[7], w7
-; CHECK-NEXT: ld1 { v17.b }[4], [x8]
+; CHECK-NEXT: add x9, sp, #256
+; CHECK-NEXT: add x10, sp, #200
+; CHECK-NEXT: ld1 { v7.b }[4], [x8]
+; CHECK-NEXT: ld1 { v5.b }[5], [x9]
+; CHECK-NEXT: add x14, sp, #72
+; CHECK-NEXT: ld1 { v1.b }[7], [x10]
+; CHECK-NEXT: add x10, sp, #432
+; CHECK-NEXT: add x8, sp, #392
+; CHECK-NEXT: ld1 { v6.b }[7], [x14]
+; CHECK-NEXT: ld1 { v3.b }[3], [x10]
+; CHECK-NEXT: ld1 { v2.b }[6], [x8]
; CHECK-NEXT: add x8, sp, #320
+; CHECK-NEXT: add x9, sp, #264
+; CHECK-NEXT: sshll v21.8h, v4.8b, #0
+; CHECK-NEXT: ldr b4, [sp, #208]
+; CHECK-NEXT: ld1 { v7.b }[5], [x8]
+; CHECK-NEXT: ld1 { v5.b }[6], [x9]
+; CHECK-NEXT: add x10, sp, #440
+; CHECK-NEXT: add x8, sp, #400
+; CHECK-NEXT: sshll v16.8h, v6.8b, #0
+; CHECK-NEXT: sshll v6.8h, v4.8b, #0
+; CHECK-NEXT: ld1 { v3.b }[4], [x10]
+; CHECK-NEXT: ld1 { v2.b }[7], [x8]
+; CHECK-NEXT: add x8, sp, #272
+; CHECK-NEXT: add x9, sp, #328
+; CHECK-NEXT: ldr b4, [sp, #608]
+; CHECK-NEXT: ld1 { v7.b }[6], [x9]
+; CHECK-NEXT: ld1 { v5.b }[7], [x8]
+; CHECK-NEXT: add x8, sp, #616
; CHECK-NEXT: add x10, sp, #448
-; CHECK-NEXT: ldr b6, [sp, #208]
-; CHECK-NEXT: ld1 { v5.b }[5], [x8]
-; CHECK-NEXT: add x8, sp, #624
-; CHECK-NEXT: ldr b7, [sp, #472]
-; CHECK-NEXT: ld1 { v22.b }[2], [x8]
-; CHECK-NEXT: ld1 { v17.b }[5], [x10]
-; CHECK-NEXT: add x10, sp, #328
-; CHECK-NEXT: sshll v20.8h, v4.8b, #0
-; CHECK-NEXT: ldr b4, [sp, #480]
+; CHECK-NEXT: ld1 { v4.b }[1], [x8]
+; CHECK-NEXT: ldr b18, [sp, #480]
+; CHECK-NEXT: ld1 { v3.b }[5], [x10]
+; CHECK-NEXT: add x9, sp, #336
+; CHECK-NEXT: ldr b17, [sp, #472]
+; CHECK-NEXT: add x8, sp, #488
+; CHECK-NEXT: ld1 { v7.b }[7], [x9]
+; CHECK-NEXT: add x9, sp, #624
+; CHECK-NEXT: ld1 { v18.b }[1], [x8]
+; CHECK-NEXT: sshll v22.8h, v5.8b, #0
; CHECK-NEXT: add x8, sp, #456
-; CHECK-NEXT: ld1 { v5.b }[6], [x10]
-; CHECK-NEXT: add x10, sp, #632
-; CHECK-NEXT: sshll v6.8h, v6.8b, #0
-; CHECK-NEXT: ld1 { v22.b }[3], [x10]
-; CHECK-NEXT: add x10, sp, #488
-; CHECK-NEXT: ld1 { v17.b }[6], [x8]
-; CHECK-NEXT: add x8, sp, #336
-; CHECK-NEXT: ld1 { v4.b }[1], [x10]
-; CHECK-NEXT: sshll v7.8h, v7.8b, #0
-; CHECK-NEXT: ld1 { v5.b }[7], [x8]
-; CHECK-NEXT: add x8, sp, #640
-; CHECK-NEXT: add x9, sp, #264
-; CHECK-NEXT: ld1 { v22.b }[4], [x8]
+; CHECK-NEXT: sshll v5.8h, v17.8b, #0
+; CHECK-NEXT: ld1 { v4.b }[2], [x9]
+; CHECK-NEXT: ld1 { v3.b }[6], [x8]
; CHECK-NEXT: add x8, sp, #496
-; CHECK-NEXT: ld1 { v16.b }[6], [x9]
-; CHECK-NEXT: ld1 { v4.b }[2], [x8]
-; CHECK-NEXT: add x8, sp, #648
-; CHECK-NEXT: smull v18.4s, v6.4h, v7.4h
-; CHECK-NEXT: ldr b7, [sp, #544]
-; CHECK-NEXT: add x9, sp, #272
-; CHECK-NEXT: movi v6.2d, #0000000000000000
-; CHECK-NEXT: ld1 { v22.b }[5], [x8]
+; CHECK-NEXT: sshll v17.8h, v7.8b, #0
+; CHECK-NEXT: add x10, sp, #632
+; CHECK-NEXT: ld1 { v18.b }[2], [x8]
+; CHECK-NEXT: add x9, sp, #464
; CHECK-NEXT: add x8, sp, #504
-; CHECK-NEXT: ld1 { v16.b }[7], [x9]
-; CHECK-NEXT: ld1 { v4.b }[3], [x8]
-; CHECK-NEXT: add x8, sp, #552
-; CHECK-NEXT: add x9, sp, #656
-; CHECK-NEXT: ld1 { v7.b }[1], [x8]
+; CHECK-NEXT: smull v19.4s, v6.4h, v5.4h
+; CHECK-NEXT: movi v5.2d, #0000000000000000
+; CHECK-NEXT: ld1 { v4.b }[3], [x10]
+; CHECK-NEXT: ld1 { v3.b }[7], [x9]
+; CHECK-NEXT: smull v6.4s, v16.4h, v17.4h
+; CHECK-NEXT: add x9, sp, #640
+; CHECK-NEXT: ld1 { v18.b }[3], [x8]
+; CHECK-NEXT: smull2 v16.4s, v16.8h, v17.8h
+; CHECK-NEXT: ldr b17, [sp, #672]
+; CHECK-NEXT: ld1 { v4.b }[4], [x9]
+; CHECK-NEXT: add x9, sp, #680
+; CHECK-NEXT: ldr b20, [sp, #544]
+; CHECK-NEXT: mov v5.s[0], v19.s[0]
; CHECK-NEXT: add x8, sp, #512
-; CHECK-NEXT: ldr b21, [sp, #672]
-; CHECK-NEXT: ld1 { v22.b }[6], [x9]
-; CHECK-NEXT: mov v6.s[0], v18.s[0]
-; CHECK-NEXT: add x9, sp, #664
-; CHECK-NEXT: ld1 { v4.b }[4], [x8]
-; CHECK-NEXT: add x8, sp, #560
-; CHECK-NEXT: sshll v23.8h, v16.8b, #0
-; CHECK-NEXT: ld1 { v7.b }[2], [x8]
-; CHECK-NEXT: add x8, sp, #520
-; CHECK-NEXT: movi v19.2d, #0000000000000000
-; CHECK-NEXT: ld1 { v22.b }[7], [x9]
-; CHECK-NEXT: add x9, sp, #528
-; CHECK-NEXT: add x10, sp, #464
-; CHECK-NEXT: ld1 { v4.b }[5], [x8]
-; CHECK-NEXT: add x8, sp, #568
-; CHECK-NEXT: smull2 v18.4s, v20.8h, v23.8h
-; CHECK-NEXT: ld1 { v7.b }[3], [x8]
-; CHECK-NEXT: add x8, sp, #680
-; CHECK-NEXT: smlal v6.4s, v20.4h, v23.4h
-; CHECK-NEXT: ld1 { v21.b }[1], [x8]
-; CHECK-NEXT: sshll v20.8h, v22.8b, #0
-; CHECK-NEXT: ldr b22, [sp, #736]
-; CHECK-NEXT: ld1 { v4.b }[6], [x9]
-; CHECK-NEXT: add x9, sp, #576
-; CHECK-NEXT: ldr b23, [sp, #1000]
-; CHECK-NEXT: ld1 { v7.b }[4], [x9]
-; CHECK-NEXT: add x9, sp, #688
-; CHECK-NEXT: sshll v24.8h, v22.8b, #0
-; CHECK-NEXT: ld1 { v21.b }[2], [x9]
+; CHECK-NEXT: ld1 { v17.b }[1], [x9]
+; CHECK-NEXT: add x11, sp, #552
+; CHECK-NEXT: add x10, sp, #648
+; CHECK-NEXT: ld1 { v18.b }[4], [x8]
+; CHECK-NEXT: ld1 { v20.b }[1], [x11]
+; CHECK-NEXT: ld1 { v4.b }[5], [x10]
+; CHECK-NEXT: add x10, sp, #688
+; CHECK-NEXT: add x9, sp, #520
+; CHECK-NEXT: ld1 { v17.b }[2], [x10]
+; CHECK-NEXT: add x10, sp, #560
+; CHECK-NEXT: smull2 v7.4s, v21.8h, v22.8h
+; CHECK-NEXT: ld1 { v18.b }[5], [x9]
+; CHECK-NEXT: smlal v5.4s, v21.4h, v22.4h
+; CHECK-NEXT: ld1 { v20.b }[2], [x10]
+; CHECK-NEXT: ldr b21, [sp, #736]
+; CHECK-NEXT: ldr b22, [sp, #1000]
+; CHECK-NEXT: add x8, sp, #656
; CHECK-NEXT: add x9, sp, #696
-; CHECK-NEXT: sshll v25.8h, v23.8b, #0
-; CHECK-NEXT: add x8, sp, #536
-; CHECK-NEXT: ldr b22, [sp, #872]
-; CHECK-NEXT: ldr b23, [sp, #936]
-; CHECK-NEXT: ld1 { v4.b }[7], [x8]
-; CHECK-NEXT: add x8, sp, #584
-; CHECK-NEXT: ld1 { v17.b }[7], [x10]
-; CHECK-NEXT: ld1 { v21.b }[3], [x9]
-; CHECK-NEXT: ld1 { v7.b }[5], [x8]
-; CHECK-NEXT: add x8, sp, #880
-; CHECK-NEXT: add x9, sp, #704
-; CHECK-NEXT: smull v25.4s, v24.4h, v25.4h
-; CHECK-NEXT: ldr b24, [sp, #744]
-; CHECK-NEXT: ld1 { v22.b }[1], [x8]
-; CHECK-NEXT: add x8, sp, #944
-; CHECK-NEXT: add x10, sp, #888
-; CHECK-NEXT: ld1 { v21.b }[4], [x9]
-; CHECK-NEXT: add x9, sp, #752
-; CHECK-NEXT: ld1 { v23.b }[1], [x8]
-; CHECK-NEXT: ld1 { v24.b }[1], [x9]
-; CHECK-NEXT: add x8, sp, #712
+; CHECK-NEXT: add x11, sp, #568
+; CHECK-NEXT: ld1 { v4.b }[6], [x8]
+; CHECK-NEXT: add x8, sp, #528
+; CHECK-NEXT: ld1 { v17.b }[3], [x9]
+; CHECK-NEXT: sshll v21.8h, v21.8b, #0
+; CHECK-NEXT: sshll v24.8h, v22.8b, #0
+; CHECK-NEXT: ld1 { v18.b }[6], [x8]
+; CHECK-NEXT: ld1 { v20.b }[3], [x11]
+; CHECK-NEXT: add x10, sp, #704
+; CHECK-NEXT: ldr b23, [sp, #808]
+; CHECK-NEXT: movi v19.2d, #0000000000000000
+; CHECK-NEXT: add x9, sp, #536
+; CHECK-NEXT: ld1 { v17.b }[4], [x10]
+; CHECK-NEXT: add x10, sp, #576
+; CHECK-NEXT: ldr b22, [sp, #744]
+; CHECK-NEXT: add x11, sp, #816
+; CHECK-NEXT: smull v24.4s, v21.4h, v24.4h
+; CHECK-NEXT: ld1 { v18.b }[7], [x9]
+; CHECK-NEXT: ld1 { v20.b }[4], [x10]
+; CHECK-NEXT: add x10, sp, #752
+; CHECK-NEXT: ld1 { v23.b }[1], [x11]
+; CHECK-NEXT: add x9, sp, #712
+; CHECK-NEXT: ld1 { v22.b }[1], [x10]
+; CHECK-NEXT: ld1 { v17.b }[5], [x9]
+; CHECK-NEXT: add x9, sp, #584
+; CHECK-NEXT: add x10, sp, #824
+; CHECK-NEXT: sshll v21.8h, v18.8b, #0
+; CHECK-NEXT: ld1 { v20.b }[5], [x9]
; CHECK-NEXT: add x9, sp, #760
-; CHECK-NEXT: ld1 { v22.b }[2], [x10]
-; CHECK-NEXT: add x10, sp, #952
-; CHECK-NEXT: mov v19.s[0], v25.s[0]
-; CHECK-NEXT: ldr b25, [sp, #808]
+; CHECK-NEXT: ldr b18, [sp, #936]
; CHECK-NEXT: ld1 { v23.b }[2], [x10]
-; CHECK-NEXT: ld1 { v21.b }[5], [x8]
-; CHECK-NEXT: ld1 { v24.b }[2], [x9]
-; CHECK-NEXT: add x8, sp, #816
-; CHECK-NEXT: add x9, sp, #896
-; CHECK-NEXT: ld1 { v25.b }[1], [x8]
-; CHECK-NEXT: add x8, sp, #960
-; CHECK-NEXT: ld1 { v22.b }[3], [x9]
-; CHECK-NEXT: add x9, sp, #768
-; CHECK-NEXT: ld1 { v23.b }[3], [x8]
-; CHECK-NEXT: add x10, sp, #904
-; CHECK-NEXT: ld1 { v24.b }[3], [x9]
-; CHECK-NEXT: add x9, sp, #824
-; CHECK-NEXT: add x8, sp, #720
-; CHECK-NEXT: ld1 { v25.b }[2], [x9]
-; CHECK-NEXT: add x9, sp, #968
-; CHECK-NEXT: ld1 { v22.b }[4], [x10]
-; CHECK-NEXT: add x10, sp, #776
-; CHECK-NEXT: ld1 { v23.b }[4], [x9]
-; CHECK-NEXT: ld1 { v21.b }[6], [x8]
-; CHECK-NEXT: ld1 { v24.b }[4], [x10]
-; CHECK-NEXT: add x8, sp, #832
-; CHECK-NEXT: add x9, sp, #912
-; CHECK-NEXT: ld1 { v25.b }[3], [x8]
-; CHECK-NEXT: add x8, sp, #976
-; CHECK-NEXT: ld1 { v22.b }[5], [x9]
-; CHECK-NEXT: add x9, sp, #784
-; CHECK-NEXT: ld1 { v23.b }[5], [x8]
-; CHECK-NEXT: add x10, sp, #920
-; CHECK-NEXT: ld1 { v24.b }[5], [x9]
-; CHECK-NEXT: add x9, sp, #840
-; CHECK-NEXT: add x8, sp, #728
-; CHECK-NEXT: ld1 { v25.b }[4], [x9]
-; CHECK-NEXT: add x9, sp, #984
-; CHECK-NEXT: ld1 { v22.b }[6], [x10]
-; CHECK-NEXT: add x10, sp, #792
-; CHECK-NEXT: ld1 { v23.b }[6], [x9]
-; CHECK-NEXT: ld1 { v21.b }[7], [x8]
-; CHECK-NEXT: ld1 { v24.b }[6], [x10]
-; CHECK-NEXT: add x8, sp, #848
-; CHECK-NEXT: add x9, sp, #928
-; CHECK-NEXT: ld1 { v25.b }[5], [x8]
-; CHECK-NEXT: add x12, sp, #72
-; CHECK-NEXT: add x8, sp, #992
-; CHECK-NEXT: ld1 { v22.b }[7], [x9]
-; CHECK-NEXT: add x9, sp, #800
-; CHECK-NEXT: ld1 { v3.b }[7], [x12]
-; CHECK-NEXT: ld1 { v23.b }[7], [x8]
-; CHECK-NEXT: add x8, sp, #592
-; CHECK-NEXT: ld1 { v24.b }[7], [x9]
-; CHECK-NEXT: add x9, sp, #856
-; CHECK-NEXT: ld1 { v7.b }[6], [x8]
-; CHECK-NEXT: add x11, sp, #200
-; CHECK-NEXT: ld1 { v25.b }[6], [x9]
-; CHECK-NEXT: sshll v3.8h, v3.8b, #0
-; CHECK-NEXT: sshll v5.8h, v5.8b, #0
-; CHECK-NEXT: sshll v4.8h, v4.8b, #0
-; CHECK-NEXT: sshll v21.8h, v21.8b, #0
+; CHECK-NEXT: mov v19.s[0], v24.s[0]
+; CHECK-NEXT: ldr b24, [sp, #872]
+; CHECK-NEXT: ld1 { v22.b }[2], [x9]
+; CHECK-NEXT: add x9, sp, #944
+; CHECK-NEXT: add x11, sp, #880
+; CHECK-NEXT: add x10, sp, #768
+; CHECK-NEXT: ld1 { v18.b }[1], [x9]
+; CHECK-NEXT: add x9, sp, #832
+; CHECK-NEXT: ld1 { v24.b }[1], [x11]
+; CHECK-NEXT: ld1 { v23.b }[3], [x9]
+; CHECK-NEXT: ld1 { v22.b }[3], [x10]
+; CHECK-NEXT: add x10, sp, #952
+; CHECK-NEXT: add x12, sp, #888
+; CHECK-NEXT: add x9, sp, #592
+; CHECK-NEXT: add x11, sp, #776
+; CHECK-NEXT: ld1 { v18.b }[2], [x10]
+; CHECK-NEXT: add x10, sp, #840
+; CHECK-NEXT: ld1 { v24.b }[2], [x12]
+; CHECK-NEXT: ld1 { v23.b }[4], [x10]
+; CHECK-NEXT: ld1 { v22.b }[4], [x11]
+; CHECK-NEXT: ld1 { v20.b }[6], [x9]
+; CHECK-NEXT: add x9, sp, #960
+; CHECK-NEXT: add x11, sp, #896
+; CHECK-NEXT: add x10, sp, #784
+; CHECK-NEXT: ld1 { v18.b }[3], [x9]
+; CHECK-NEXT: add x9, sp, #848
+; CHECK-NEXT: ld1 { v24.b }[3], [x11]
+; CHECK-NEXT: ld1 { v23.b }[5], [x9]
+; CHECK-NEXT: ld1 { v22.b }[5], [x10]
+; CHECK-NEXT: add x10, sp, #968
+; CHECK-NEXT: add x12, sp, #904
+; CHECK-NEXT: add x9, sp, #600
+; CHECK-NEXT: add x11, sp, #792
+; CHECK-NEXT: ld1 { v18.b }[4], [x10]
+; CHECK-NEXT: add x10, sp, #856
+; CHECK-NEXT: ld1 { v24.b }[4], [x12]
+; CHECK-NEXT: ld1 { v23.b }[6], [x10]
+; CHECK-NEXT: ld1 { v22.b }[6], [x11]
+; CHECK-NEXT: ld1 { v20.b }[7], [x9]
+; CHECK-NEXT: add x9, sp, #976
+; CHECK-NEXT: add x11, sp, #912
+; CHECK-NEXT: add x10, sp, #800
+; CHECK-NEXT: ld1 { v18.b }[5], [x9]
+; CHECK-NEXT: add x9, sp, #864
+; CHECK-NEXT: ld1 { v24.b }[5], [x11]
+; CHECK-NEXT: ld1 { v23.b }[7], [x9]
+; CHECK-NEXT: add x9, sp, #720
+; CHECK-NEXT: ld1 { v22.b }[7], [x10]
+; CHECK-NEXT: add x10, sp, #984
+; CHECK-NEXT: ld1 { v17.b }[6], [x9]
+; CHECK-NEXT: add x9, sp, #920
+; CHECK-NEXT: ld1 { v18.b }[6], [x10]
+; CHECK-NEXT: ld1 { v24.b }[6], [x9]
+; CHECK-NEXT: add x10, sp, #728
+; CHECK-NEXT: add x8, sp, #664
+; CHECK-NEXT: sshll v20.8h, v20.8b, #0
; CHECK-NEXT: sshll v22.8h, v22.8b, #0
; CHECK-NEXT: sshll v23.8h, v23.8b, #0
-; CHECK-NEXT: add x8, sp, #600
-; CHECK-NEXT: sshll v24.8h, v24.8b, #0
-; CHECK-NEXT: add x9, sp, #864
-; CHECK-NEXT: ld1 { v2.b }[7], [x11]
-; CHECK-NEXT: ld1 { v7.b }[7], [x8]
-; CHECK-NEXT: ld1 { v25.b }[7], [x9]
-; CHECK-NEXT: smull v16.4s, v3.4h, v5.4h
-; CHECK-NEXT: smull2 v3.4s, v3.8h, v5.8h
-; CHECK-NEXT: smull v5.4s, v21.4h, v23.4h
-; CHECK-NEXT: smull2 v21.4s, v21.8h, v23.8h
-; CHECK-NEXT: smull2 v23.4s, v20.8h, v22.8h
-; CHECK-NEXT: smlal v19.4s, v4.4h, v24.4h
-; CHECK-NEXT: sshll v2.8h, v2.8b, #0
-; CHECK-NEXT: sshll v17.8h, v17.8b, #0
+; CHECK-NEXT: add x9, sp, #992
+; CHECK-NEXT: ld1 { v17.b }[7], [x10]
+; CHECK-NEXT: add x10, sp, #928
+; CHECK-NEXT: ld1 { v18.b }[7], [x9]
+; CHECK-NEXT: ld1 { v4.b }[7], [x8]
+; CHECK-NEXT: ld1 { v24.b }[7], [x10]
+; CHECK-NEXT: smlal v19.4s, v21.4h, v22.4h
+; CHECK-NEXT: smull2 v21.4s, v21.8h, v22.8h
+; CHECK-NEXT: smull v22.4s, v20.4h, v23.4h
+; CHECK-NEXT: smull2 v20.4s, v20.8h, v23.8h
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-NEXT: sshll v7.8h, v7.8b, #0
-; CHECK-NEXT: sshll v25.8h, v25.8b, #0
-; CHECK-NEXT: smlal2 v3.4s, v2.8h, v17.8h
-; CHECK-NEXT: smlal v16.4s, v2.4h, v17.4h
-; CHECK-NEXT: smlal2 v23.4s, v4.8h, v24.8h
-; CHECK-NEXT: smlal2 v18.4s, v0.8h, v1.8h
-; CHECK-NEXT: smlal v6.4s, v0.4h, v1.4h
-; CHECK-NEXT: smlal v19.4s, v20.4h, v22.4h
-; CHECK-NEXT: smlal2 v21.4s, v7.8h, v25.8h
-; CHECK-NEXT: smlal v5.4s, v7.4h, v25.4h
-; CHECK-NEXT: add v0.4s, v18.4s, v3.4s
-; CHECK-NEXT: add v1.4s, v6.4s, v16.4s
-; CHECK-NEXT: add v2.4s, v23.4s, v21.4s
-; CHECK-NEXT: add v3.4s, v19.4s, v5.4s
+; CHECK-NEXT: sshll v3.8h, v3.8b, #0
+; CHECK-NEXT: sshll v2.8h, v2.8b, #0
+; CHECK-NEXT: sshll v17.8h, v17.8b, #0
+; CHECK-NEXT: sshll v18.8h, v18.8b, #0
+; CHECK-NEXT: sshll v4.8h, v4.8b, #0
+; CHECK-NEXT: sshll v23.8h, v24.8b, #0
+; CHECK-NEXT: smlal2 v16.4s, v1.8h, v3.8h
+; CHECK-NEXT: smlal v6.4s, v1.4h, v3.4h
+; CHECK-NEXT: smlal2 v7.4s, v0.8h, v2.8h
+; CHECK-NEXT: smlal v5.4s, v0.4h, v2.4h
+; CHECK-NEXT: smlal2 v20.4s, v17.8h, v18.8h
+; CHECK-NEXT: smlal v22.4s, v17.4h, v18.4h
+; CHECK-NEXT: smlal2 v21.4s, v4.8h, v23.8h
+; CHECK-NEXT: smlal v19.4s, v4.4h, v23.4h
+; CHECK-NEXT: add v0.4s, v7.4s, v16.4s
+; CHECK-NEXT: add v1.4s, v5.4s, v6.4s
+; CHECK-NEXT: add v2.4s, v21.4s, v20.4s
+; CHECK-NEXT: add v3.4s, v19.4s, v22.4s
; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
; CHECK-NEXT: add v1.4s, v3.4s, v2.4s
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
@@ -2050,10 +2050,10 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> %
; CHECK-NEXT: ld1 { v3.b }[2], [x10]
; CHECK-NEXT: ld1 { v5.b }[2], [x8]
; CHECK-NEXT: add x8, sp, #176
-; CHECK-NEXT: ldr b6, [sp, #544]
+; CHECK-NEXT: ldr b6, [sp, #672]
; CHECK-NEXT: ld1 { v0.b }[4], [x12]
-; CHECK-NEXT: add x14, sp, #552
-; CHECK-NEXT: ldr b7, [sp, #672]
+; CHECK-NEXT: add x14, sp, #680
+; CHECK-NEXT: ldr b7, [sp, #544]
; CHECK-NEXT: ld1 { v2.b }[4], [x8]
; CHECK-NEXT: add x13, sp, #40
; CHECK-NEXT: ld1 { v6.b }[1], [x14]
@@ -2061,7 +2061,7 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> %
; CHECK-NEXT: add x11, sp, #128
; CHECK-NEXT: ld1 { v3.b }[3], [x13]
; CHECK-NEXT: ld1 { v0.b }[5], [x9]
-; CHECK-NEXT: add x9, sp, #680
+; CHECK-NEXT: add x9, sp, #552
; CHECK-NEXT: add x13, sp, #184
; CHECK-NEXT: ld1 { v7.b }[1], [x9]
; CHECK-NEXT: ld1 { v2.b }[5], [x13]
@@ -2070,26 +2070,26 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> %
; CHECK-NEXT: ld1 { v4.b }[2], [x13]
; CHECK-NEXT: add x10, sp, #136
; CHECK-NEXT: ld1 { v0.b }[6], [x11]
-; CHECK-NEXT: add x11, sp, #560
+; CHECK-NEXT: add x11, sp, #688
; CHECK-NEXT: ld1 { v5.b }[3], [x15]
; CHECK-NEXT: ld1 { v6.b }[2], [x11]
-; CHECK-NEXT: add x11, sp, #688
+; CHECK-NEXT: add x11, sp, #560
; CHECK-NEXT: mov v1.b[3], w3
; CHECK-NEXT: ld1 { v7.b }[2], [x11]
; CHECK-NEXT: add x9, sp, #632
; CHECK-NEXT: add x11, sp, #512
; CHECK-NEXT: ld1 { v0.b }[7], [x10]
; CHECK-NEXT: ld1 { v4.b }[3], [x9]
-; CHECK-NEXT: add x9, sp, #568
-; CHECK-NEXT: add x10, sp, #696
+; CHECK-NEXT: add x9, sp, #696
+; CHECK-NEXT: add x10, sp, #568
; CHECK-NEXT: ld1 { v6.b }[3], [x9]
; CHECK-NEXT: ld1 { v5.b }[4], [x11]
; CHECK-NEXT: ld1 { v7.b }[3], [x10]
; CHECK-NEXT: add x9, sp, #640
; CHECK-NEXT: mov v1.b[4], w4
; CHECK-NEXT: ld1 { v4.b }[4], [x9]
-; CHECK-NEXT: add x9, sp, #576
-; CHECK-NEXT: add x10, sp, #704
+; CHECK-NEXT: add x9, sp, #704
+; CHECK-NEXT: add x10, sp, #576
; CHECK-NEXT: add x11, sp, #520
; CHECK-NEXT: ld1 { v6.b }[4], [x9]
; CHECK-NEXT: ldr b18, [sp, #736]
@@ -2101,8 +2101,8 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> %
; CHECK-NEXT: add x9, sp, #648
; CHECK-NEXT: ld1 { v3.b }[4], [x8]
; CHECK-NEXT: add x10, sp, #528
-; CHECK-NEXT: add x11, sp, #584
-; CHECK-NEXT: add x12, sp, #712
+; CHECK-NEXT: add x11, sp, #712
+; CHECK-NEXT: add x12, sp, #584
; CHECK-NEXT: sshll v18.8h, v18.8b, #0
; CHECK-NEXT: mov v1.b[5], w5
; CHECK-NEXT: ld1 { v6.b }[5], [x11]
@@ -2114,8 +2114,8 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> %
; CHECK-NEXT: ld1 { v3.b }[5], [x14]
; CHECK-NEXT: add x9, sp, #656
; CHECK-NEXT: add x10, sp, #536
-; CHECK-NEXT: add x11, sp, #592
-; CHECK-NEXT: add x12, sp, #720
+; CHECK-NEXT: add x11, sp, #720
+; CHECK-NEXT: add x12, sp, #592
; CHECK-NEXT: sshll v18.4s, v18.4h, #0
; CHECK-NEXT: ldr b16, [sp, #208]
; CHECK-NEXT: ld1 { v6.b }[6], [x11]
@@ -2127,8 +2127,8 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> %
; CHECK-NEXT: sshll v16.8h, v16.8b, #0
; CHECK-NEXT: ld1 { v3.b }[6], [x8]
; CHECK-NEXT: add x8, sp, #664
-; CHECK-NEXT: add x9, sp, #600
-; CHECK-NEXT: add x10, sp, #728
+; CHECK-NEXT: add x9, sp, #728
+; CHECK-NEXT: add x10, sp, #600
; CHECK-NEXT: mov v17.s[0], v18.s[0]
; CHECK-NEXT: ld1 { v6.b }[7], [x9]
; CHECK-NEXT: ld1 { v7.b }[7], [x10]
@@ -2151,7 +2151,7 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> %
; CHECK-NEXT: sshll v2.8h, v2.8b, #0
; CHECK-NEXT: sshll v3.8h, v3.8b, #0
; CHECK-NEXT: saddl2 v16.4s, v7.8h, v6.8h
-; CHECK-NEXT: saddl2 v5.4s, v4.8h, v5.8h
+; CHECK-NEXT: saddl2 v5.4s, v5.8h, v4.8h
; CHECK-NEXT: saddl v6.4s, v7.4h, v6.4h
; CHECK-NEXT: saddw v4.4s, v17.4s, v4.4h
; CHECK-NEXT: saddl2 v17.4s, v1.8h, v0.8h
diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index 364e8c7b38dac..42ea425f99c0a 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -1843,3 +1843,152 @@ define i8 @mulsub_demand_2(i8 %x, i8 %y) nounwind {
%r = or i8 %a, 240
ret i8 %r
}
+
+define i64 @muland_demand(i64 %x) nounwind {
+; RV32I-LABEL: muland_demand:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a0, a0, -8
+; RV32I-NEXT: slli a1, a1, 2
+; RV32I-NEXT: srli a1, a1, 2
+; RV32I-NEXT: li a2, 12
+; RV32I-NEXT: li a3, 0
+; RV32I-NEXT: call __muldi3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IM-LABEL: muland_demand:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: andi a0, a0, -8
+; RV32IM-NEXT: li a2, 12
+; RV32IM-NEXT: mul a1, a1, a2
+; RV32IM-NEXT: mulhu a3, a0, a2
+; RV32IM-NEXT: add a1, a3, a1
+; RV32IM-NEXT: mul a0, a0, a2
+; RV32IM-NEXT: ret
+;
+; RV64I-LABEL: muland_demand:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a1, -29
+; RV64I-NEXT: srli a1, a1, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: li a1, 12
+; RV64I-NEXT: tail __muldi3
+;
+; RV64IM-LABEL: muland_demand:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: andi a0, a0, -8
+; RV64IM-NEXT: li a1, 12
+; RV64IM-NEXT: mul a0, a0, a1
+; RV64IM-NEXT: ret
+ %and = and i64 %x, 4611686018427387896
+ %mul = mul i64 %and, 12
+ ret i64 %mul
+}
+
+define i64 @mulzext_demand(i32 signext %x) nounwind {
+; RV32I-LABEL: mulzext_demand:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a3, 3
+; RV32I-NEXT: li a2, 0
+; RV32I-NEXT: call __muldi3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IM-LABEL: mulzext_demand:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: slli a1, a0, 1
+; RV32IM-NEXT: add a1, a1, a0
+; RV32IM-NEXT: li a0, 0
+; RV32IM-NEXT: ret
+;
+; RV64I-LABEL: mulzext_demand:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a1, 3
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: tail __muldi3
+;
+; RV64IM-LABEL: mulzext_demand:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: li a1, 3
+; RV64IM-NEXT: slli a1, a1, 32
+; RV64IM-NEXT: mul a0, a0, a1
+; RV64IM-NEXT: ret
+ %ext = zext i32 %x to i64
+ %mul = mul i64 %ext, 12884901888
+ ret i64 %mul
+}
+
+define i32 @mulfshl_demand(i32 signext %x) nounwind {
+; RV32I-LABEL: mulfshl_demand:
+; RV32I: # %bb.0:
+; RV32I-NEXT: srli a0, a0, 11
+; RV32I-NEXT: lui a1, 92808
+; RV32I-NEXT: tail __mulsi3
+;
+; RV32IM-LABEL: mulfshl_demand:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: srli a0, a0, 11
+; RV32IM-NEXT: lui a1, 92808
+; RV32IM-NEXT: mul a0, a0, a1
+; RV32IM-NEXT: ret
+;
+; RV64I-LABEL: mulfshl_demand:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: srliw a0, a0, 11
+; RV64I-NEXT: lui a1, 92808
+; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IM-LABEL: mulfshl_demand:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: srliw a0, a0, 11
+; RV64IM-NEXT: lui a1, 92808
+; RV64IM-NEXT: mulw a0, a0, a1
+; RV64IM-NEXT: ret
+ %fshl = tail call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 21)
+ %mul = mul i32 %fshl, 380141568
+ ret i32 %mul
+}
+
+define i32 @mulor_demand(i32 signext %x, i32 signext %y) nounwind {
+; RV32I-LABEL: mulor_demand:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a1, 92808
+; RV32I-NEXT: tail __mulsi3
+;
+; RV32IM-LABEL: mulor_demand:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: lui a1, 92808
+; RV32IM-NEXT: mul a0, a0, a1
+; RV32IM-NEXT: ret
+;
+; RV64I-LABEL: mulor_demand:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: lui a1, 92808
+; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IM-LABEL: mulor_demand:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: lui a1, 92808
+; RV64IM-NEXT: mulw a0, a0, a1
+; RV64IM-NEXT: ret
+ %mul1 = mul i32 %y, 10485760
+ %or = or disjoint i32 %mul1, %x
+ %mul2 = mul i32 %or, 380141568
+ ret i32 %mul2
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll
index 2db0d40b0ce52..cf7be57ccc901 100644
--- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll
@@ -637,8 +637,6 @@ define i64 @zext_mul288(i32 signext %a) {
define i64 @zext_mul12884901888(i32 signext %a) {
; RV64I-LABEL: zext_mul12884901888:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: li a1, 3
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: mul a0, a0, a1
@@ -646,8 +644,8 @@ define i64 @zext_mul12884901888(i32 signext %a) {
;
; RV64ZBA-LABEL: zext_mul12884901888:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: slli a0, a0, 32
; RV64ZBA-NEXT: sh1add a0, a0, a0
+; RV64ZBA-NEXT: slli a0, a0, 32
; RV64ZBA-NEXT: ret
%b = zext i32 %a to i64
%c = mul i64 %b, 12884901888
@@ -658,8 +656,6 @@ define i64 @zext_mul12884901888(i32 signext %a) {
define i64 @zext_mul21474836480(i32 signext %a) {
; RV64I-LABEL: zext_mul21474836480:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: li a1, 5
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: mul a0, a0, a1
@@ -667,8 +663,8 @@ define i64 @zext_mul21474836480(i32 signext %a) {
;
; RV64ZBA-LABEL: zext_mul21474836480:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: slli a0, a0, 32
; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: slli a0, a0, 32
; RV64ZBA-NEXT: ret
%b = zext i32 %a to i64
%c = mul i64 %b, 21474836480
@@ -679,8 +675,6 @@ define i64 @zext_mul21474836480(i32 signext %a) {
define i64 @zext_mul38654705664(i32 signext %a) {
; RV64I-LABEL: zext_mul38654705664:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: li a1, 9
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: mul a0, a0, a1
@@ -688,8 +682,8 @@ define i64 @zext_mul38654705664(i32 signext %a) {
;
; RV64ZBA-LABEL: zext_mul38654705664:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: slli a0, a0, 32
; RV64ZBA-NEXT: sh3add a0, a0, a0
+; RV64ZBA-NEXT: slli a0, a0, 32
; RV64ZBA-NEXT: ret
%b = zext i32 %a to i64
%c = mul i64 %b, 38654705664
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index dc93c0215a25c..4a568fb2b25c8 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -856,8 +856,6 @@ define i64 @zext_mul288(i32 signext %a) {
define i64 @zext_mul12884901888(i32 signext %a) {
; RV64I-LABEL: zext_mul12884901888:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: li a1, 3
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: mul a0, a0, a1
@@ -865,8 +863,8 @@ define i64 @zext_mul12884901888(i32 signext %a) {
;
; RV64ZBA-LABEL: zext_mul12884901888:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: slli a0, a0, 32
; RV64ZBA-NEXT: sh1add a0, a0, a0
+; RV64ZBA-NEXT: slli a0, a0, 32
; RV64ZBA-NEXT: ret
%b = zext i32 %a to i64
%c = mul i64 %b, 12884901888
@@ -877,8 +875,6 @@ define i64 @zext_mul12884901888(i32 signext %a) {
define i64 @zext_mul21474836480(i32 signext %a) {
; RV64I-LABEL: zext_mul21474836480:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: li a1, 5
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: mul a0, a0, a1
@@ -886,8 +882,8 @@ define i64 @zext_mul21474836480(i32 signext %a) {
;
; RV64ZBA-LABEL: zext_mul21474836480:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: slli a0, a0, 32
; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: slli a0, a0, 32
; RV64ZBA-NEXT: ret
%b = zext i32 %a to i64
%c = mul i64 %b, 21474836480
@@ -898,8 +894,6 @@ define i64 @zext_mul21474836480(i32 signext %a) {
define i64 @zext_mul38654705664(i32 signext %a) {
; RV64I-LABEL: zext_mul38654705664:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: li a1, 9
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: mul a0, a0, a1
@@ -907,8 +901,8 @@ define i64 @zext_mul38654705664(i32 signext %a) {
;
; RV64ZBA-LABEL: zext_mul38654705664:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: slli a0, a0, 32
; RV64ZBA-NEXT: sh3add a0, a0, a0
+; RV64ZBA-NEXT: slli a0, a0, 32
; RV64ZBA-NEXT: ret
%b = zext i32 %a to i64
%c = mul i64 %b, 38654705664
diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll
index f707cb31e3ece..8cf78551d28f9 100644
--- a/llvm/test/CodeGen/RISCV/sextw-removal.ll
+++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll
@@ -1047,25 +1047,25 @@ define signext i32 @bug(i32 signext %x) {
; CHECK-NEXT: seqz a2, a2
; CHECK-NEXT: slli a3, a2, 3
; CHECK-NEXT: sllw a1, a1, a3
-; CHECK-NEXT: neg a2, a2
+; CHECK-NEXT: negw a2, a2
; CHECK-NEXT: andi a2, a2, -8
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: srliw a2, a1, 28
; CHECK-NEXT: seqz a2, a2
; CHECK-NEXT: slli a3, a2, 2
; CHECK-NEXT: sllw a1, a1, a3
-; CHECK-NEXT: neg a2, a2
+; CHECK-NEXT: negw a2, a2
; CHECK-NEXT: andi a2, a2, -4
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: srliw a2, a1, 30
; CHECK-NEXT: seqz a2, a2
; CHECK-NEXT: slli a3, a2, 1
; CHECK-NEXT: sllw a1, a1, a3
-; CHECK-NEXT: neg a2, a2
+; CHECK-NEXT: negw a2, a2
; CHECK-NEXT: andi a2, a2, -2
; CHECK-NEXT: add a0, a0, a2
-; CHECK-NEXT: srai a1, a1, 31
; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: srli a1, a1, 31
; CHECK-NEXT: addw a0, a0, a1
; CHECK-NEXT: .LBB18_4: # %cleanup
; CHECK-NEXT: ret
@@ -1087,28 +1087,27 @@ define signext i32 @bug(i32 signext %x) {
; NOREMOVAL-NEXT: seqz a2, a2
; NOREMOVAL-NEXT: slli a3, a2, 3
; NOREMOVAL-NEXT: sllw a1, a1, a3
-; NOREMOVAL-NEXT: neg a2, a2
+; NOREMOVAL-NEXT: negw a2, a2
; NOREMOVAL-NEXT: andi a2, a2, -8
; NOREMOVAL-NEXT: add a0, a0, a2
; NOREMOVAL-NEXT: srliw a2, a1, 28
; NOREMOVAL-NEXT: seqz a2, a2
; NOREMOVAL-NEXT: slli a3, a2, 2
; NOREMOVAL-NEXT: sllw a1, a1, a3
-; NOREMOVAL-NEXT: neg a2, a2
+; NOREMOVAL-NEXT: negw a2, a2
; NOREMOVAL-NEXT: andi a2, a2, -4
; NOREMOVAL-NEXT: add a0, a0, a2
; NOREMOVAL-NEXT: srliw a2, a1, 30
; NOREMOVAL-NEXT: seqz a2, a2
; NOREMOVAL-NEXT: slli a3, a2, 1
; NOREMOVAL-NEXT: sllw a1, a1, a3
-; NOREMOVAL-NEXT: neg a2, a2
+; NOREMOVAL-NEXT: negw a2, a2
; NOREMOVAL-NEXT: andi a2, a2, -2
; NOREMOVAL-NEXT: add a0, a0, a2
-; NOREMOVAL-NEXT: srai a1, a1, 31
; NOREMOVAL-NEXT: not a1, a1
-; NOREMOVAL-NEXT: add a0, a0, a1
+; NOREMOVAL-NEXT: srli a1, a1, 31
+; NOREMOVAL-NEXT: addw a0, a0, a1
; NOREMOVAL-NEXT: .LBB18_4: # %cleanup
-; NOREMOVAL-NEXT: sext.w a0, a0
; NOREMOVAL-NEXT: ret
entry:
%tobool.not = icmp eq i32 %x, 0
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll
index 83d7275358ce3..3300d46bf8561 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll
@@ -130,26 +130,26 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_zext(<8 x i16> %x) {
; CHECK-NEXT: vmov.i64 q1, #0xffff
; CHECK-NEXT: vand q2, q2, q1
; CHECK-NEXT: vmov.u16 r3, q0[2]
-; CHECK-NEXT: vmov r0, r1, d5
-; CHECK-NEXT: vmov r2, s8
-; CHECK-NEXT: add r0, r2
-; CHECK-NEXT: vmov.u16 r2, q0[3]
-; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
+; CHECK-NEXT: vmov r0, s10
+; CHECK-NEXT: vmov r1, r2, d4
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: vmov.u16 r1, q0[3]
+; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
; CHECK-NEXT: vmov.u16 r3, q0[4]
; CHECK-NEXT: vand q2, q2, q1
-; CHECK-NEXT: vmov r2, s8
-; CHECK-NEXT: add r0, r2
-; CHECK-NEXT: vmov r2, s10
-; CHECK-NEXT: add r0, r2
-; CHECK-NEXT: vmov.u16 r2, q0[5]
-; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
+; CHECK-NEXT: vmov r1, s8
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: vmov r1, s10
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: vmov.u16 r1, q0[5]
+; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
; CHECK-NEXT: vand q2, q2, q1
-; CHECK-NEXT: vmov r2, s8
-; CHECK-NEXT: add r0, r2
-; CHECK-NEXT: vmov r2, r3, d5
-; CHECK-NEXT: adds r0, r0, r2
+; CHECK-NEXT: vmov r1, s8
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: vmov r1, r3, d5
+; CHECK-NEXT: adds r0, r0, r1
+; CHECK-NEXT: adc.w r1, r2, r3
; CHECK-NEXT: vmov.u16 r2, q0[7]
-; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: vmov.u16 r3, q0[6]
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
; CHECK-NEXT: vand q0, q0, q1
@@ -228,8 +228,8 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i64 q1, #0xffff
; CHECK-NEXT: vand q0, q0, q1
-; CHECK-NEXT: vmov r0, r1, d1
-; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov r2, r1, d0
; CHECK-NEXT: add r0, r2
; CHECK-NEXT: bx lr
entry:
@@ -397,26 +397,26 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x) {
; CHECK-NEXT: vmov.i64 q1, #0xff
; CHECK-NEXT: vand q2, q2, q1
; CHECK-NEXT: vmov.u8 r3, q0[2]
-; CHECK-NEXT: vmov r0, r1, d5
-; CHECK-NEXT: vmov r2, s8
-; CHECK-NEXT: add r0, r2
-; CHECK-NEXT: vmov.u8 r2, q0[3]
-; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
+; CHECK-NEXT: vmov r0, s10
+; CHECK-NEXT: vmov r1, r2, d4
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: vmov.u8 r1, q0[3]
+; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
; CHECK-NEXT: vmov.u8 r3, q0[4]
; CHECK-NEXT: vand q2, q2, q1
-; CHECK-NEXT: vmov r2, s8
-; CHECK-NEXT: add r0, r2
-; CHECK-NEXT: vmov r2, s10
-; CHECK-NEXT: add r0, r2
-; CHECK-NEXT: vmov.u8 r2, q0[5]
-; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
+; CHECK-NEXT: vmov r1, s8
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: vmov r1, s10
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: vmov.u8 r1, q0[5]
+; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
; CHECK-NEXT: vand q2, q2, q1
-; CHECK-NEXT: vmov r2, s8
-; CHECK-NEXT: add r0, r2
-; CHECK-NEXT: vmov r2, r3, d5
-; CHECK-NEXT: adds r0, r0, r2
+; CHECK-NEXT: vmov r1, s8
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: vmov r1, r3, d5
+; CHECK-NEXT: adds r0, r0, r1
+; CHECK-NEXT: adc.w r1, r2, r3
; CHECK-NEXT: vmov.u8 r2, q0[7]
-; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: vmov.u8 r3, q0[6]
; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
; CHECK-NEXT: vand q2, q2, q1
@@ -540,26 +540,26 @@ define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_zext(<8 x i8> %x) {
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
; CHECK-NEXT: vmov.u16 r3, q0[2]
; CHECK-NEXT: vand q2, q2, q1
-; CHECK-NEXT: vmov r0, r1, d5
-; CHECK-NEXT: vmov r2, s8
-; CHECK-NEXT: add r0, r2
-; CHECK-NEXT: vmov.u16 r2, q0[3]
-; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
+; CHECK-NEXT: vmov r0, s10
+; CHECK-NEXT: vmov r1, r2, d4
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: vmov.u16 r1, q0[3]
+; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
; CHECK-NEXT: vmov.u16 r3, q0[4]
; CHECK-NEXT: vand q2, q2, q1
-; CHECK-NEXT: vmov r2, s8
-; CHECK-NEXT: add r0, r2
-; CHECK-NEXT: vmov r2, s10
-; CHECK-NEXT: add r0, r2
-; CHECK-NEXT: vmov.u16 r2, q0[5]
-; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
+; CHECK-NEXT: vmov r1, s8
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: vmov r1, s10
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: vmov.u16 r1, q0[5]
+; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
; CHECK-NEXT: vand q2, q2, q1
-; CHECK-NEXT: vmov r2, s8
-; CHECK-NEXT: add r0, r2
-; CHECK-NEXT: vmov r2, r3, d5
-; CHECK-NEXT: adds r0, r0, r2
+; CHECK-NEXT: vmov r1, s8
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: vmov r1, r3, d5
+; CHECK-NEXT: adds r0, r0, r1
+; CHECK-NEXT: adc.w r1, r2, r3
; CHECK-NEXT: vmov.u16 r2, q0[7]
-; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: vmov.u16 r3, q0[6]
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
; CHECK-NEXT: vand q0, q0, q1
@@ -648,8 +648,8 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i64 q1, #0xff
; CHECK-NEXT: vand q0, q0, q1
-; CHECK-NEXT: vmov r0, r1, d1
-; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov r2, r1, d0
; CHECK-NEXT: add r0, r2
; CHECK-NEXT: bx lr
entry:
@@ -834,8 +834,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, i64 %a) {
; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
; CHECK-NEXT: vmov.i64 q1, #0xffff
; CHECK-NEXT: vand q2, q2, q1
-; CHECK-NEXT: vmov r2, r12, d5
-; CHECK-NEXT: vmov r3, s8
+; CHECK-NEXT: vmov r2, s10
+; CHECK-NEXT: vmov r3, r12, d4
; CHECK-NEXT: add.w lr, r3, r2
; CHECK-NEXT: vmov.u16 r3, q0[3]
; CHECK-NEXT: vmov.u16 r2, q0[2]
@@ -943,8 +943,8 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, i64 %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i64 q1, #0xffff
; CHECK-NEXT: vand q0, q0, q1
-; CHECK-NEXT: vmov r2, r12, d1
-; CHECK-NEXT: vmov r3, s0
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov r3, r12, d0
; CHECK-NEXT: add r2, r3
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adc.w r1, r1, r12
@@ -1130,8 +1130,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, i64 %a) {
; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
; CHECK-NEXT: vmov.i64 q1, #0xff
; CHECK-NEXT: vand q2, q2, q1
-; CHECK-NEXT: vmov r2, r12, d5
-; CHECK-NEXT: vmov r3, s8
+; CHECK-NEXT: vmov r2, s10
+; CHECK-NEXT: vmov r3, r12, d4
; CHECK-NEXT: add.w lr, r3, r2
; CHECK-NEXT: vmov.u8 r3, q0[3]
; CHECK-NEXT: vmov.u8 r2, q0[2]
@@ -1283,8 +1283,8 @@ define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_acc_zext(<8 x i8> %x, i64 %a) {
; CHECK-NEXT: vmov.u16 r3, q0[0]
; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
; CHECK-NEXT: vand q2, q2, q1
-; CHECK-NEXT: vmov r2, r12, d5
-; CHECK-NEXT: vmov r3, s8
+; CHECK-NEXT: vmov r2, s10
+; CHECK-NEXT: vmov r3, r12, d4
; CHECK-NEXT: add.w lr, r3, r2
; CHECK-NEXT: vmov.u16 r3, q0[3]
; CHECK-NEXT: vmov.u16 r2, q0[2]
@@ -1402,8 +1402,8 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, i64 %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i64 q1, #0xff
; CHECK-NEXT: vand q0, q0, q1
-; CHECK-NEXT: vmov r2, r12, d1
-; CHECK-NEXT: vmov r3, s0
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov r3, r12, d0
; CHECK-NEXT: add r2, r3
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adc.w r1, r1, r12
diff --git a/llvm/test/CodeGen/X86/combine-srem.ll b/llvm/test/CodeGen/X86/combine-srem.ll
index 49ce2455ae8c7..4ed00a9d66bd3 100644
--- a/llvm/test/CodeGen/X86/combine-srem.ll
+++ b/llvm/test/CodeGen/X86/combine-srem.ll
@@ -329,7 +329,7 @@ define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) {
; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
; SSE-NEXT: movdqa %xmm1, %xmm2
; SSE-NEXT: psrad $3, %xmm2
-; SSE-NEXT: psrad $1, %xmm1
+; SSE-NEXT: psrld $1, %xmm1
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -351,7 +351,7 @@ define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) {
; AVX1-NEXT: vpsrad $2, %xmm1, %xmm3
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
; AVX1-NEXT: vpsrad $3, %xmm1, %xmm3
-; AVX1-NEXT: vpsrad $1, %xmm1, %xmm1
+; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll
index dcded7a877abb..1f82c4a5a2d92 100644
--- a/llvm/test/CodeGen/X86/pmul.ll
+++ b/llvm/test/CodeGen/X86/pmul.ll
@@ -1173,13 +1173,14 @@ define <4 x i32> @mul_v4i64_zero_lower(<4 x i32> %val1, <4 x i64> %val2) {
;
; SSE41-LABEL: mul_v4i64_zero_lower:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3]
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,3,3]
; SSE41-NEXT: psrlq $32, %xmm2
-; SSE41-NEXT: pmuludq %xmm3, %xmm2
+; SSE41-NEXT: pmuludq %xmm0, %xmm2
; SSE41-NEXT: psrlq $32, %xmm1
-; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; SSE41-NEXT: pmuludq %xmm1, %xmm0
-; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
+; SSE41-NEXT: pmuludq %xmm1, %xmm3
+; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2],xmm2[0,2]
+; SSE41-NEXT: movaps %xmm3, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: mul_v4i64_zero_lower:
diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll
index 62051d1709940..f3f7f0515e306 100644
--- a/llvm/test/CodeGen/X86/shrink_vmul.ll
+++ b/llvm/test/CodeGen/X86/shrink_vmul.ll
@@ -1863,7 +1863,7 @@ define void @mul_2xi16_varconst3(ptr nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE-NEXT: movl c, %edx
; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: psrld $16, %xmm0
+; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE-NEXT: psllq $32, %xmm0
; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
@@ -1884,7 +1884,7 @@ define void @mul_2xi16_varconst3(ptr nocapture readonly %a, i64 %index) {
; X64-SSE: # %bb.0: # %entry
; X64-SSE-NEXT: movq c(%rip), %rax
; X64-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-SSE-NEXT: psrld $16, %xmm0
+; X64-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
; X64-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE-NEXT: psllq $32, %xmm0
; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)
More information about the llvm-commits
mailing list