[llvm] [LLVM][SelectionDAG] Don't legalise splat constants until required. (PR #143571)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 10 10:02:42 PDT 2025
https://github.com/paulwalker-arm created https://github.com/llvm/llvm-project/pull/143571
This PR is another step in the direction of enabling ConstantInt for vector types. The results look mostly positive to my untrained eye (NOTE: I'd like to ignore `test_compress_v1i32_with_sve` as being unrealistic given a single element compress should be canonicalised to a select?)
The exception is X86 where I'm in need of help. The change to `SelectionDAG::getConstant()` causes several X86 unit tests to hang. Upon inspection I traced this to combineSelect in X86ISelLowering.cpp which inverts a `select` condition that DAGCombiner then undoes and we continually bonce between the two states. I'm guessing we're just lucky this is not biting us already and my `DCI.isBeforeLegalize()` addition only continues to hide the problem rather than fix it. Even with this change the results show a couple of cases where the restriction leads to worse code. Are there any recommendations?
>From 26a6e901914877efedde6d9f8167ad88d379171b Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Thu, 14 Mar 2024 11:13:38 +0000
Subject: [PATCH 1/2] [LLVM][SelectionDAG] Don't legalise splat constants until
required.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 136 +++++------
.../CodeGen/AArch64/arm64-neon-mul-div-cte.ll | 12 +-
llvm/test/CodeGen/AArch64/srem-vector-lkk.ll | 66 ++---
llvm/test/CodeGen/AArch64/ssub_sat_vec.ll | 4 +-
llvm/test/CodeGen/AArch64/sve-expand-div.ll | 1 +
llvm/test/CodeGen/AArch64/sve-sdiv-pow2.ll | 1 +
.../CodeGen/AArch64/sve-vector-compress.ll | 5 +-
llvm/test/CodeGen/AArch64/urem-vector-lkk.ll | 4 +-
llvm/test/CodeGen/AArch64/usub_sat_vec.ll | 4 +-
llvm/test/CodeGen/ARM/bool-ext-inc.ll | 5 +-
llvm/test/CodeGen/RISCV/rvv/combine-sats.ll | 4 +-
.../RISCV/rvv/fixed-vectors-extract.ll | 11 +-
.../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 47 ++--
.../RISCV/rvv/fixed-vectors-select-addsub.ll | 12 +-
.../RISCV/rvv/fixed-vectors-vsadd-vp.ll | 2 +-
.../RISCV/rvv/fixed-vectors-vssub-vp.ll | 2 +-
.../RISCV/rvv/fixed-vectors-zvqdotq.ll | 4 +-
llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll | 196 ++++++---------
llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll | 71 ++----
llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll | 226 ++++++------------
llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll | 2 +-
llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll | 2 +-
22 files changed, 319 insertions(+), 498 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4fc026ca562ba..1b9c28002b210 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1679,81 +1679,81 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
Elt = ConstantInt::get(*getContext(), Elt->getValue());
// In some cases the vector type is legal but the element type is illegal and
- // needs to be promoted, for example v8i8 on ARM. In this case, promote the
- // inserted value (the type does not need to match the vector element type).
- // Any extra bits introduced will be truncated away.
- if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) ==
- TargetLowering::TypePromoteInteger) {
- EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
- APInt NewVal;
- if (TLI->isSExtCheaperThanZExt(VT.getScalarType(), EltVT))
- NewVal = Elt->getValue().sextOrTrunc(EltVT.getSizeInBits());
- else
- NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
- Elt = ConstantInt::get(*getContext(), NewVal);
- }
- // In other cases the element type is illegal and needs to be expanded, for
- // example v2i64 on MIPS32. In this case, find the nearest legal type, split
- // the value into n parts and use a vector type with n-times the elements.
- // Then bitcast to the type requested.
- // Legalizing constants too early makes the DAGCombiner's job harder so we
- // only legalize if the DAG tells us we must produce legal types.
- else if (NewNodesMustHaveLegalTypes && VT.isVector() &&
- TLI->getTypeAction(*getContext(), EltVT) ==
- TargetLowering::TypeExpandInteger) {
- const APInt &NewVal = Elt->getValue();
- EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
- unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
-
- // For scalable vectors, try to use a SPLAT_VECTOR_PARTS node.
- if (VT.isScalableVector() ||
- TLI->isOperationLegal(ISD::SPLAT_VECTOR, VT)) {
- assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 &&
- "Can only handle an even split!");
- unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits;
-
- SmallVector<SDValue, 2> ScalarParts;
- for (unsigned i = 0; i != Parts; ++i)
- ScalarParts.push_back(getConstant(
- NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL,
- ViaEltVT, isT, isO));
-
- return getNode(ISD::SPLAT_VECTOR_PARTS, DL, VT, ScalarParts);
- }
+ // thus when necessary we "legalise" the constant here so as to simplify the
+ // job of calling this function. NOTE: Only legalize when necessary so that
+ // we don't make DAGCombiner's job harder.
+ if (NewNodesMustHaveLegalTypes && VT.isVector()) {
+ // Promote the inserted value (the type does not need to match the vector
+ // element type). Any extra bits introduced will be truncated away.
+ if (TLI->getTypeAction(*getContext(), EltVT) ==
+ TargetLowering::TypePromoteInteger) {
+ EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
+ APInt NewVal;
+ if (TLI->isSExtCheaperThanZExt(VT.getScalarType(), EltVT))
+ NewVal = Elt->getValue().sextOrTrunc(EltVT.getSizeInBits());
+ else
+ NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
+ Elt = ConstantInt::get(*getContext(), NewVal);
+ }
+ // For expansion we find the nearest legal type, split the value into n
+ // parts and use a vector type with n-times the elements. Then bitcast to
+ // the type requested.
+ else if (TLI->getTypeAction(*getContext(), EltVT) ==
+ TargetLowering::TypeExpandInteger) {
+ const APInt &NewVal = Elt->getValue();
+ EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
+ unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
+
+ // For scalable vectors, try to use a SPLAT_VECTOR_PARTS node.
+ if (VT.isScalableVector() ||
+ TLI->isOperationLegal(ISD::SPLAT_VECTOR, VT)) {
+ assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 &&
+ "Can only handle an even split!");
+ unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits;
+
+ SmallVector<SDValue, 2> ScalarParts;
+ for (unsigned i = 0; i != Parts; ++i)
+ ScalarParts.push_back(getConstant(
+ NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL,
+ ViaEltVT, isT, isO));
+
+ return getNode(ISD::SPLAT_VECTOR_PARTS, DL, VT, ScalarParts);
+ }
- unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits;
- EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts);
+ unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits;
+ EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts);
- // Check the temporary vector is the correct size. If this fails then
- // getTypeToTransformTo() probably returned a type whose size (in bits)
- // isn't a power-of-2 factor of the requested type size.
- assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits());
+ // Check the temporary vector is the correct size. If this fails then
+ // getTypeToTransformTo() probably returned a type whose size (in bits)
+ // isn't a power-of-2 factor of the requested type size.
+ assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits());
- SmallVector<SDValue, 2> EltParts;
- for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i)
- EltParts.push_back(getConstant(
- NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL,
- ViaEltVT, isT, isO));
+ SmallVector<SDValue, 2> EltParts;
+ for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i)
+ EltParts.push_back(getConstant(
+ NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL,
+ ViaEltVT, isT, isO));
- // EltParts is currently in little endian order. If we actually want
- // big-endian order then reverse it now.
- if (getDataLayout().isBigEndian())
- std::reverse(EltParts.begin(), EltParts.end());
+ // EltParts is currently in little endian order. If we actually want
+ // big-endian order then reverse it now.
+ if (getDataLayout().isBigEndian())
+ std::reverse(EltParts.begin(), EltParts.end());
- // The elements must be reversed when the element order is different
- // to the endianness of the elements (because the BITCAST is itself a
- // vector shuffle in this situation). However, we do not need any code to
- // perform this reversal because getConstant() is producing a vector
- // splat.
- // This situation occurs in MIPS MSA.
+ // The elements must be reversed when the element order is different
+ // to the endianness of the elements (because the BITCAST is itself a
+ // vector shuffle in this situation). However, we do not need any code to
+ // perform this reversal because getConstant() is producing a vector
+ // splat.
+ // This situation occurs in MIPS MSA.
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
- llvm::append_range(Ops, EltParts);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
+ llvm::append_range(Ops, EltParts);
- SDValue V =
- getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
- return V;
+ SDValue V =
+ getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
+ return V;
+ }
}
assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll
index bdbebd8726fde..1be02ae602a3c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll
@@ -8,9 +8,9 @@ define <16 x i8> @div16xi8(<16 x i8> %x) {
; CHECK-SD-NEXT: movi v1.16b, #41
; CHECK-SD-NEXT: smull2 v2.8h, v0.16b, v1.16b
; CHECK-SD-NEXT: smull v0.8h, v0.8b, v1.8b
-; CHECK-SD-NEXT: uzp2 v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: sshr v0.16b, v0.16b, #2
-; CHECK-SD-NEXT: usra v0.16b, v0.16b, #7
+; CHECK-SD-NEXT: uzp2 v1.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: sshr v0.16b, v1.16b, #2
+; CHECK-SD-NEXT: usra v0.16b, v1.16b, #7
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: div16xi8:
@@ -78,9 +78,9 @@ define <8 x i16> @div8xi16(<8 x i16> %x) {
; CHECK-SD-NEXT: smull2 v2.4s, v0.8h, v1.8h
; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h
-; CHECK-SD-NEXT: add v0.8h, v1.8h, v0.8h
-; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #12
-; CHECK-SD-NEXT: usra v0.8h, v0.8h, #15
+; CHECK-SD-NEXT: add v1.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT: sshr v0.8h, v1.8h, #12
+; CHECK-SD-NEXT: usra v0.8h, v1.8h, #15
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: div8xi16:
diff --git a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
index b165ac0d56d20..6c8ebc65a327c 100644
--- a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
@@ -14,10 +14,10 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
; CHECK-NEXT: mla v1.4h, v0.4h, v2.4h
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_2]
; CHECK-NEXT: adrp x8, .LCPI0_3
-; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_3]
-; CHECK-NEXT: usra v1.4h, v1.4h, #15
-; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h
+; CHECK-NEXT: sshl v2.4h, v1.4h, v2.4h
+; CHECK-NEXT: usra v2.4h, v1.4h, #15
+; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_3]
+; CHECK-NEXT: mls v0.4h, v2.4h, v1.4h
; CHECK-NEXT: ret
%1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003>
ret <4 x i16> %1
@@ -27,14 +27,14 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
; CHECK-LABEL: fold_srem_vec_2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #44151 // =0xac77
-; CHECK-NEXT: movi v2.4h, #95
+; CHECK-NEXT: movi v3.4h, #95
; CHECK-NEXT: dup v1.4h, w8
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: add v1.4h, v1.4h, v0.4h
-; CHECK-NEXT: sshr v1.4h, v1.4h, #6
-; CHECK-NEXT: usra v1.4h, v1.4h, #15
-; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h
+; CHECK-NEXT: sshr v2.4h, v1.4h, #6
+; CHECK-NEXT: usra v2.4h, v1.4h, #15
+; CHECK-NEXT: mls v0.4h, v2.4h, v3.4h
; CHECK-NEXT: ret
%1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
ret <4 x i16> %1
@@ -46,15 +46,15 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
; CHECK-LABEL: combine_srem_sdiv:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #44151 // =0xac77
-; CHECK-NEXT: movi v2.4h, #95
+; CHECK-NEXT: movi v3.4h, #95
; CHECK-NEXT: dup v1.4h, w8
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: add v1.4h, v1.4h, v0.4h
-; CHECK-NEXT: sshr v1.4h, v1.4h, #6
-; CHECK-NEXT: usra v1.4h, v1.4h, #15
-; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h
-; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: sshr v2.4h, v1.4h, #6
+; CHECK-NEXT: usra v2.4h, v1.4h, #15
+; CHECK-NEXT: mls v0.4h, v2.4h, v3.4h
+; CHECK-NEXT: add v0.4h, v0.4h, v2.4h
; CHECK-NEXT: ret
%1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
%2 = sdiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
@@ -74,10 +74,10 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: add v1.4h, v1.4h, v0.4h
-; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_2]
-; CHECK-NEXT: usra v1.4h, v1.4h, #15
-; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h
+; CHECK-NEXT: sshl v2.4h, v1.4h, v2.4h
+; CHECK-NEXT: usra v2.4h, v1.4h, #15
+; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI3_2]
+; CHECK-NEXT: mls v0.4h, v2.4h, v1.4h
; CHECK-NEXT: ret
%1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
ret <4 x i16> %1
@@ -91,14 +91,14 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
; CHECK-NEXT: movi d2, #0x00ffff0000ffff
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT: adrp x8, .LCPI4_1
+; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_1]
+; CHECK-NEXT: adrp x8, .LCPI4_2
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: and v2.8b, v0.8b, v2.8b
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: add v1.4h, v1.4h, v2.4h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_1]
-; CHECK-NEXT: adrp x8, .LCPI4_2
-; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h
; CHECK-NEXT: ushr v2.4h, v1.4h, #15
+; CHECK-NEXT: sshl v1.4h, v1.4h, v3.4h
; CHECK-NEXT: mov v2.h[0], wzr
; CHECK-NEXT: add v1.4h, v1.4h, v2.4h
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_2]
@@ -118,12 +118,12 @@ define <4 x i16> @dont_fold_srem_i16_smax(<4 x i16> %x) {
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_0]
; CHECK-NEXT: adrp x8, .LCPI5_2
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
+; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI5_2]
+; CHECK-NEXT: adrp x8, .LCPI5_3
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: mla v1.4h, v0.4h, v2.4h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_2]
-; CHECK-NEXT: adrp x8, .LCPI5_3
-; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h
; CHECK-NEXT: ushr v2.4h, v1.4h, #15
+; CHECK-NEXT: sshl v1.4h, v1.4h, v3.4h
; CHECK-NEXT: mov v2.h[0], wzr
; CHECK-NEXT: add v1.4h, v1.4h, v2.4h
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_3]
@@ -181,13 +181,13 @@ define <16 x i8> @fold_srem_v16i8(<16 x i8> %x) {
; CHECK-LABEL: fold_srem_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.16b, #103
+; CHECK-NEXT: movi v3.16b, #10
; CHECK-NEXT: smull2 v2.8h, v0.16b, v1.16b
; CHECK-NEXT: smull v1.8h, v0.8b, v1.8b
; CHECK-NEXT: uzp2 v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: movi v2.16b, #10
-; CHECK-NEXT: sshr v1.16b, v1.16b, #2
-; CHECK-NEXT: usra v1.16b, v1.16b, #7
-; CHECK-NEXT: mls v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: sshr v2.16b, v1.16b, #2
+; CHECK-NEXT: usra v2.16b, v1.16b, #7
+; CHECK-NEXT: mls v0.16b, v2.16b, v3.16b
; CHECK-NEXT: ret
%1 = srem <16 x i8> %x, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
ret <16 x i8> %1
@@ -199,8 +199,8 @@ define <8 x i8> @fold_srem_v8i8(<8 x i8> %x) {
; CHECK-NEXT: movi v1.8b, #103
; CHECK-NEXT: movi v2.8b, #10
; CHECK-NEXT: smull v1.8h, v0.8b, v1.8b
-; CHECK-NEXT: shrn v1.8b, v1.8h, #8
-; CHECK-NEXT: sshr v1.8b, v1.8b, #2
+; CHECK-NEXT: sshr v1.8h, v1.8h, #10
+; CHECK-NEXT: xtn v1.8b, v1.8h
; CHECK-NEXT: usra v1.8b, v1.8b, #7
; CHECK-NEXT: mls v0.8b, v1.8b, v2.8b
; CHECK-NEXT: ret
@@ -212,14 +212,14 @@ define <8 x i16> @fold_srem_v8i16(<8 x i16> %x) {
; CHECK-LABEL: fold_srem_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #26215 // =0x6667
+; CHECK-NEXT: movi v3.8h, #10
; CHECK-NEXT: dup v1.8h, w8
; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
-; CHECK-NEXT: movi v2.8h, #10
-; CHECK-NEXT: sshr v1.8h, v1.8h, #2
-; CHECK-NEXT: usra v1.8h, v1.8h, #15
-; CHECK-NEXT: mls v0.8h, v1.8h, v2.8h
+; CHECK-NEXT: sshr v2.8h, v1.8h, #2
+; CHECK-NEXT: usra v2.8h, v1.8h, #15
+; CHECK-NEXT: mls v0.8h, v2.8h, v3.8h
; CHECK-NEXT: ret
%1 = srem <8 x i16> %x, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
ret <8 x i16> %1
diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
index 3af858713525b..7e95f61604620 100644
--- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
@@ -356,9 +356,7 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; CHECK-LABEL: v16i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.16b, #1
-; CHECK-NEXT: eor v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
diff --git a/llvm/test/CodeGen/AArch64/sve-expand-div.ll b/llvm/test/CodeGen/AArch64/sve-expand-div.ll
index 180c64e0a7de1..bd6c72a3946c1 100644
--- a/llvm/test/CodeGen/AArch64/sve-expand-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-expand-div.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -use-constant-int-for-scalable-splat < %s | FileCheck %s
; Check that expensive divides are expanded into a more performant sequence
diff --git a/llvm/test/CodeGen/AArch64/sve-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-sdiv-pow2.ll
index 4607f225f81ea..a799b51f15cb1 100644
--- a/llvm/test/CodeGen/AArch64/sve-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-sdiv-pow2.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s | FileCheck %s
+; RUN: llc -use-constant-int-for-scalable-splat < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
index 8a504cd739211..944071b9d2161 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
@@ -176,10 +176,11 @@ define <1 x i32> @test_compress_v1i32_with_sve(<1 x i32> %vec, <1 x i1> %mask) {
; CHECK-LABEL: test_compress_v1i32_with_sve:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: sbfx w8, w0, #0, #1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-NEXT: mov v1.s[0], w8
+; CHECK-NEXT: mov v1.s[0], w0
+; CHECK-NEXT: shl v1.2s, v1.2s, #31
+; CHECK-NEXT: cmlt v1.2s, v1.2s, #0
; CHECK-NEXT: ushll v1.2d, v1.2s, #0
; CHECK-NEXT: and z1.d, z1.d, #0x1
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
diff --git a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
index 468a33ce5bfcf..bd7952a7992c6 100644
--- a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
@@ -186,8 +186,8 @@ define <8 x i8> @fold_urem_v8i8(<8 x i8> %x) {
; CHECK-NEXT: movi v1.8b, #205
; CHECK-NEXT: movi v2.8b, #10
; CHECK-NEXT: umull v1.8h, v0.8b, v1.8b
-; CHECK-NEXT: shrn v1.8b, v1.8h, #8
-; CHECK-NEXT: ushr v1.8b, v1.8b, #3
+; CHECK-NEXT: ushr v1.8h, v1.8h, #11
+; CHECK-NEXT: xtn v1.8b, v1.8h
; CHECK-NEXT: mls v0.8b, v1.8b, v2.8b
; CHECK-NEXT: ret
%1 = urem <8 x i8> %x, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
index a71cf95a728db..34d9294ac7f3c 100644
--- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
@@ -345,9 +345,7 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; CHECK-LABEL: v16i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.16b, #1
-; CHECK-NEXT: eor v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
diff --git a/llvm/test/CodeGen/ARM/bool-ext-inc.ll b/llvm/test/CodeGen/ARM/bool-ext-inc.ll
index 00a7fcdee3caa..80e89139389b1 100644
--- a/llvm/test/CodeGen/ARM/bool-ext-inc.ll
+++ b/llvm/test/CodeGen/ARM/bool-ext-inc.ll
@@ -14,9 +14,8 @@ define i32 @sext_inc(i1 zeroext %x) {
define <4 x i32> @sext_inc_vec(<4 x i1> %x) {
; CHECK-LABEL: sext_inc_vec:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.i16 d16, #0x1
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: veor d16, d17, d16
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmvn d16, d16
; CHECK-NEXT: vmov.i32 q9, #0x1
; CHECK-NEXT: vmovl.u16 q8, d16
; CHECK-NEXT: vand q8, q8, q9
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll
index 8f917becafec0..7484068e236ac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll
@@ -197,7 +197,7 @@ define <vscale x 2 x i16> @vselect_add_const_signbit_nxv2i16(<vscale x 2 x i16>
define <2 x i16> @vselect_xor_const_signbit_v2i16(<2 x i16> %a0) {
; CHECK-LABEL: vselect_xor_const_signbit_v2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: lui a0, 1048568
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vssubu.vx v8, v8, a0
; CHECK-NEXT: ret
@@ -210,7 +210,7 @@ define <2 x i16> @vselect_xor_const_signbit_v2i16(<2 x i16> %a0) {
define <vscale x 2 x i16> @vselect_xor_const_signbit_nxv2i16(<vscale x 2 x i16> %a0) {
; CHECK-LABEL: vselect_xor_const_signbit_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: lui a0, 1048568
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; CHECK-NEXT: vssubu.vx v8, v8, a0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
index dba5d26c216fa..7a347ec4aacd1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -1345,18 +1345,9 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI46_0)
; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64NOM-NEXT: vle32.v v9, (a0)
-; RV64NOM-NEXT: lui a0, 1044480
-; RV64NOM-NEXT: vmv.s.x v10, a0
-; RV64NOM-NEXT: lui a0, 12320
-; RV64NOM-NEXT: addi a0, a0, 257
-; RV64NOM-NEXT: vsext.vf4 v11, v10
-; RV64NOM-NEXT: vand.vv v10, v8, v11
; RV64NOM-NEXT: vmulh.vv v8, v8, v9
-; RV64NOM-NEXT: vmv.s.x v9, a0
-; RV64NOM-NEXT: vadd.vv v8, v8, v10
-; RV64NOM-NEXT: vsext.vf4 v10, v9
-; RV64NOM-NEXT: vsra.vv v8, v8, v10
; RV64NOM-NEXT: vsrl.vi v9, v8, 31
+; RV64NOM-NEXT: vsra.vi v8, v8, 2
; RV64NOM-NEXT: vadd.vv v8, v8, v9
; RV64NOM-NEXT: vslidedown.vi v8, v8, 2
; RV64NOM-NEXT: vmv.x.s a0, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 0c30cbe4a42ef..fba07787696a3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -1269,8 +1269,8 @@ define void @mulhs_v8i16(ptr %x) {
; CHECK-NEXT: addi a1, a1, 1755
; CHECK-NEXT: vmerge.vxm v9, v9, a1, v0
; CHECK-NEXT: vmulh.vv v8, v8, v9
-; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vsrl.vi v9, v8, 15
+; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
@@ -1327,8 +1327,8 @@ define void @mulhs_v4i32(ptr %x) {
; RV64-NEXT: vmv.v.x v9, a1
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vmulh.vv v8, v8, v9
-; RV64-NEXT: vsra.vi v8, v8, 1
; RV64-NEXT: vsrl.vi v9, v8, 31
+; RV64-NEXT: vsra.vi v8, v8, 1
; RV64-NEXT: vadd.vv v8, v8, v9
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
@@ -3410,8 +3410,8 @@ define void @mulhs_v16i16(ptr %x) {
; CHECK-NEXT: addi a1, a1, 1755
; CHECK-NEXT: vmerge.vxm v10, v10, a1, v0
; CHECK-NEXT: vmulh.vv v8, v8, v10
-; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vsrl.vi v10, v8, 15
+; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vadd.vv v8, v8, v10
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
@@ -3451,8 +3451,8 @@ define void @mulhs_v8i32(ptr %x) {
; RV64-NEXT: vmv.v.x v10, a1
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vmulh.vv v8, v8, v10
-; RV64-NEXT: vsra.vi v8, v8, 1
; RV64-NEXT: vsrl.vi v10, v8, 31
+; RV64-NEXT: vsra.vi v8, v8, 1
; RV64-NEXT: vadd.vv v8, v8, v10
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
@@ -5564,8 +5564,8 @@ define void @mulhs_vx_v8i16(ptr %x) {
; CHECK-NEXT: lui a1, 5
; CHECK-NEXT: addi a1, a1, -1755
; CHECK-NEXT: vmulh.vx v8, v8, a1
-; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vsrl.vi v9, v8, 15
+; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
@@ -5576,31 +5576,18 @@ define void @mulhs_vx_v8i16(ptr %x) {
}
define void @mulhs_vx_v4i32(ptr %x) {
-; RV32-LABEL: mulhs_vx_v4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: lui a1, 629146
-; RV32-NEXT: addi a1, a1, -1639
-; RV32-NEXT: vmulh.vx v8, v8, a1
-; RV32-NEXT: vsrl.vi v9, v8, 31
-; RV32-NEXT: vsra.vi v8, v8, 1
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: mulhs_vx_v4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: lui a1, 629146
-; RV64-NEXT: addi a1, a1, -1639
-; RV64-NEXT: vmulh.vx v8, v8, a1
-; RV64-NEXT: vsra.vi v8, v8, 1
-; RV64-NEXT: vsrl.vi v9, v8, 31
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: mulhs_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: lui a1, 629146
+; CHECK-NEXT: addi a1, a1, -1639
+; CHECK-NEXT: vmulh.vx v8, v8, a1
+; CHECK-NEXT: vsrl.vi v9, v8, 31
+; CHECK-NEXT: vsra.vi v8, v8, 1
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <4 x i32>, ptr %x
%b = sdiv <4 x i32> %a, <i32 -5, i32 -5, i32 -5, i32 -5>
store <4 x i32> %b, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll
index 22956f8fe3551..9d3fe3a90b463 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll
@@ -47,9 +47,9 @@ define <4 x i32> @select_addsub_v4i32(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b)
define <4 x i32> @select_addsub_v4i32_select_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: select_addsub_v4i32_select_swapped:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vmnot.m v0, v0
-; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vrsub.vi v10, v9, 0
+; CHECK-NEXT: vmerge.vvm v9, v10, v9, v0
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: ret
%sub = sub <4 x i32> %a, %b
@@ -74,9 +74,9 @@ define <4 x i32> @select_addsub_v4i32_add_swapped(<4 x i1> %cc, <4 x i32> %a, <4
define <4 x i32> @select_addsub_v4i32_both_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: select_addsub_v4i32_both_swapped:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vmnot.m v0, v0
-; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vrsub.vi v10, v9, 0
+; CHECK-NEXT: vmerge.vvm v9, v10, v9, v0
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: ret
%sub = sub <4 x i32> %a, %b
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll
index 7afd31fdd663c..8e85b1486bacb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll
@@ -18,7 +18,7 @@ define <8 x i7> @vsadd_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroe
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: vmin.vx v8, v8, a1, v0.t
-; CHECK-NEXT: li a0, 192
+; CHECK-NEXT: li a0, -64
; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
%v = call <8 x i7> @llvm.vp.sadd.sat.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll
index 6ddf2e464750e..8430a2ebb7896 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll
@@ -18,7 +18,7 @@ define <8 x i7> @vssub_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroe
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
; CHECK-NEXT: vmin.vx v8, v8, a1, v0.t
-; CHECK-NEXT: li a0, 192
+; CHECK-NEXT: li a0, -64
; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
%v = call <8 x i7> @llvm.vp.ssub.sat.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll
index a189711d11471..d579fb82de536 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll
@@ -595,7 +595,7 @@ define <1 x i32> @vqdotu_vx_partial_reduce(<4 x i8> %a, <4 x i8> %b) {
; DOT: # %bb.0: # %entry
; DOT-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; DOT-NEXT: vmv.s.x v9, zero
-; DOT-NEXT: li a0, 128
+; DOT-NEXT: li a0, -128
; DOT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; DOT-NEXT: vmv.v.x v10, a0
; DOT-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -631,7 +631,7 @@ define <1 x i32> @vqdot_vx_partial_reduce(<4 x i8> %a, <4 x i8> %b) {
; DOT: # %bb.0: # %entry
; DOT-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; DOT-NEXT: vmv.s.x v9, zero
-; DOT-NEXT: li a0, 128
+; DOT-NEXT: li a0, -128
; DOT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; DOT-NEXT: vmv.v.x v10, a0
; DOT-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
index de15e185998c4..489302c3ce722 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
@@ -33,8 +33,8 @@ define <vscale x 1 x i8> @vdiv_vi_nxv1i8_0(<vscale x 1 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a0
; CHECK-NEXT: vsub.vv v8, v9, v8
-; CHECK-NEXT: vsra.vi v8, v8, 2
; CHECK-NEXT: vsrl.vi v9, v8, 7
+; CHECK-NEXT: vsra.vi v8, v8, 2
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: ret
%vc = sdiv <vscale x 1 x i8> %va, splat (i8 -7)
@@ -90,8 +90,8 @@ define <vscale x 2 x i8> @vdiv_vi_nxv2i8_0(<vscale x 2 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a0
; CHECK-NEXT: vsub.vv v8, v9, v8
-; CHECK-NEXT: vsra.vi v8, v8, 2
; CHECK-NEXT: vsrl.vi v9, v8, 7
+; CHECK-NEXT: vsra.vi v8, v8, 2
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: ret
%vc = sdiv <vscale x 2 x i8> %va, splat (i8 -7)
@@ -127,8 +127,8 @@ define <vscale x 4 x i8> @vdiv_vi_nxv4i8_0(<vscale x 4 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a0
; CHECK-NEXT: vsub.vv v8, v9, v8
-; CHECK-NEXT: vsra.vi v8, v8, 2
; CHECK-NEXT: vsrl.vi v9, v8, 7
+; CHECK-NEXT: vsra.vi v8, v8, 2
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: ret
%vc = sdiv <vscale x 4 x i8> %va, splat (i8 -7)
@@ -164,8 +164,8 @@ define <vscale x 8 x i8> @vdiv_vi_nxv8i8_0(<vscale x 8 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a0
; CHECK-NEXT: vsub.vv v8, v9, v8
-; CHECK-NEXT: vsra.vi v8, v8, 2
; CHECK-NEXT: vsrl.vi v9, v8, 7
+; CHECK-NEXT: vsra.vi v8, v8, 2
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: ret
%vc = sdiv <vscale x 8 x i8> %va, splat (i8 -7)
@@ -201,8 +201,8 @@ define <vscale x 16 x i8> @vdiv_vi_nxv16i8_0(<vscale x 16 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; CHECK-NEXT: vmulh.vx v10, v8, a0
; CHECK-NEXT: vsub.vv v8, v10, v8
-; CHECK-NEXT: vsra.vi v8, v8, 2
; CHECK-NEXT: vsrl.vi v10, v8, 7
+; CHECK-NEXT: vsra.vi v8, v8, 2
; CHECK-NEXT: vadd.vv v8, v8, v10
; CHECK-NEXT: ret
%vc = sdiv <vscale x 16 x i8> %va, splat (i8 -7)
@@ -238,8 +238,8 @@ define <vscale x 32 x i8> @vdiv_vi_nxv32i8_0(<vscale x 32 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; CHECK-NEXT: vmulh.vx v12, v8, a0
; CHECK-NEXT: vsub.vv v8, v12, v8
-; CHECK-NEXT: vsra.vi v8, v8, 2
; CHECK-NEXT: vsrl.vi v12, v8, 7
+; CHECK-NEXT: vsra.vi v8, v8, 2
; CHECK-NEXT: vadd.vv v8, v8, v12
; CHECK-NEXT: ret
%vc = sdiv <vscale x 32 x i8> %va, splat (i8 -7)
@@ -275,8 +275,8 @@ define <vscale x 64 x i8> @vdiv_vi_nxv64i8_0(<vscale x 64 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma
; CHECK-NEXT: vmulh.vx v16, v8, a0
; CHECK-NEXT: vsub.vv v8, v16, v8
-; CHECK-NEXT: vsra.vi v8, v8, 2
; CHECK-NEXT: vsrl.vi v16, v8, 7
+; CHECK-NEXT: vsra.vi v8, v8, 2
; CHECK-NEXT: vadd.vv v8, v8, v16
; CHECK-NEXT: ret
%vc = sdiv <vscale x 64 x i8> %va, splat (i8 -7)
@@ -312,8 +312,8 @@ define <vscale x 1 x i16> @vdiv_vi_nxv1i16_0(<vscale x 1 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1755
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmulh.vx v8, v8, a0
-; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vsrl.vi v9, v8, 15
+; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: ret
%vc = sdiv <vscale x 1 x i16> %va, splat (i16 -7)
@@ -349,8 +349,8 @@ define <vscale x 2 x i16> @vdiv_vi_nxv2i16_0(<vscale x 2 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1755
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmulh.vx v8, v8, a0
-; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vsrl.vi v9, v8, 15
+; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: ret
%vc = sdiv <vscale x 2 x i16> %va, splat (i16 -7)
@@ -386,8 +386,8 @@ define <vscale x 4 x i16> @vdiv_vi_nxv4i16_0(<vscale x 4 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1755
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vmulh.vx v8, v8, a0
-; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vsrl.vi v9, v8, 15
+; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: ret
%vc = sdiv <vscale x 4 x i16> %va, splat (i16 -7)
@@ -423,8 +423,8 @@ define <vscale x 8 x i16> @vdiv_vi_nxv8i16_0(<vscale x 8 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1755
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; CHECK-NEXT: vmulh.vx v8, v8, a0
-; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vsrl.vi v10, v8, 15
+; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vadd.vv v8, v8, v10
; CHECK-NEXT: ret
%vc = sdiv <vscale x 8 x i16> %va, splat (i16 -7)
@@ -460,8 +460,8 @@ define <vscale x 16 x i16> @vdiv_vi_nxv16i16_0(<vscale x 16 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1755
; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; CHECK-NEXT: vmulh.vx v8, v8, a0
-; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vsrl.vi v12, v8, 15
+; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vadd.vv v8, v8, v12
; CHECK-NEXT: ret
%vc = sdiv <vscale x 16 x i16> %va, splat (i16 -7)
@@ -497,8 +497,8 @@ define <vscale x 32 x i16> @vdiv_vi_nxv32i16_0(<vscale x 32 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1755
; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; CHECK-NEXT: vmulh.vx v8, v8, a0
-; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vsrl.vi v16, v8, 15
+; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vadd.vv v8, v8, v16
; CHECK-NEXT: ret
%vc = sdiv <vscale x 32 x i16> %va, splat (i16 -7)
@@ -528,29 +528,17 @@ define <vscale x 1 x i32> @vdiv_vx_nxv1i32(<vscale x 1 x i32> %va, i32 signext %
}
define <vscale x 1 x i32> @vdiv_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
-; RV32-LABEL: vdiv_vi_nxv1i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 449390
-; RV32-NEXT: addi a0, a0, -1171
-; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT: vmulh.vx v9, v8, a0
-; RV32-NEXT: vsub.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 31
-; RV32-NEXT: vsra.vi v8, v8, 2
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdiv_vi_nxv1i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addi a0, a0, -1171
-; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT: vmulh.vx v9, v8, a0
-; RV64-NEXT: vsub.vv v8, v9, v8
-; RV64-NEXT: vsra.vi v8, v8, 2
-; RV64-NEXT: vsrl.vi v9, v8, 31
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vdiv_vi_nxv1i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 449390
+; CHECK-NEXT: addi a0, a0, -1171
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmulh.vx v9, v8, a0
+; CHECK-NEXT: vsub.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 31
+; CHECK-NEXT: vsra.vi v8, v8, 2
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
%vc = sdiv <vscale x 1 x i32> %va, splat (i32 -7)
ret <vscale x 1 x i32> %vc
}
@@ -578,29 +566,17 @@ define <vscale x 2 x i32> @vdiv_vx_nxv2i32(<vscale x 2 x i32> %va, i32 signext %
}
define <vscale x 2 x i32> @vdiv_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
-; RV32-LABEL: vdiv_vi_nxv2i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 449390
-; RV32-NEXT: addi a0, a0, -1171
-; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT: vmulh.vx v9, v8, a0
-; RV32-NEXT: vsub.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 31
-; RV32-NEXT: vsra.vi v8, v8, 2
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdiv_vi_nxv2i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addi a0, a0, -1171
-; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT: vmulh.vx v9, v8, a0
-; RV64-NEXT: vsub.vv v8, v9, v8
-; RV64-NEXT: vsra.vi v8, v8, 2
-; RV64-NEXT: vsrl.vi v9, v8, 31
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vdiv_vi_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 449390
+; CHECK-NEXT: addi a0, a0, -1171
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmulh.vx v9, v8, a0
+; CHECK-NEXT: vsub.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 31
+; CHECK-NEXT: vsra.vi v8, v8, 2
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
%vc = sdiv <vscale x 2 x i32> %va, splat (i32 -7)
ret <vscale x 2 x i32> %vc
}
@@ -628,29 +604,17 @@ define <vscale x 4 x i32> @vdiv_vx_nxv4i32(<vscale x 4 x i32> %va, i32 signext %
}
define <vscale x 4 x i32> @vdiv_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
-; RV32-LABEL: vdiv_vi_nxv4i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 449390
-; RV32-NEXT: addi a0, a0, -1171
-; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT: vmulh.vx v10, v8, a0
-; RV32-NEXT: vsub.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 31
-; RV32-NEXT: vsra.vi v8, v8, 2
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdiv_vi_nxv4i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addi a0, a0, -1171
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vmulh.vx v10, v8, a0
-; RV64-NEXT: vsub.vv v8, v10, v8
-; RV64-NEXT: vsra.vi v8, v8, 2
-; RV64-NEXT: vsrl.vi v10, v8, 31
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: vdiv_vi_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 449390
+; CHECK-NEXT: addi a0, a0, -1171
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmulh.vx v10, v8, a0
+; CHECK-NEXT: vsub.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 31
+; CHECK-NEXT: vsra.vi v8, v8, 2
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: ret
%vc = sdiv <vscale x 4 x i32> %va, splat (i32 -7)
ret <vscale x 4 x i32> %vc
}
@@ -678,29 +642,17 @@ define <vscale x 8 x i32> @vdiv_vx_nxv8i32(<vscale x 8 x i32> %va, i32 signext %
}
define <vscale x 8 x i32> @vdiv_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
-; RV32-LABEL: vdiv_vi_nxv8i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 449390
-; RV32-NEXT: addi a0, a0, -1171
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vmulh.vx v12, v8, a0
-; RV32-NEXT: vsub.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 31
-; RV32-NEXT: vsra.vi v8, v8, 2
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdiv_vi_nxv8i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addi a0, a0, -1171
-; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT: vmulh.vx v12, v8, a0
-; RV64-NEXT: vsub.vv v8, v12, v8
-; RV64-NEXT: vsra.vi v8, v8, 2
-; RV64-NEXT: vsrl.vi v12, v8, 31
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: ret
+; CHECK-LABEL: vdiv_vi_nxv8i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 449390
+; CHECK-NEXT: addi a0, a0, -1171
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmulh.vx v12, v8, a0
+; CHECK-NEXT: vsub.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 31
+; CHECK-NEXT: vsra.vi v8, v8, 2
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: ret
%vc = sdiv <vscale x 8 x i32> %va, splat (i32 -7)
ret <vscale x 8 x i32> %vc
}
@@ -728,29 +680,17 @@ define <vscale x 16 x i32> @vdiv_vx_nxv16i32(<vscale x 16 x i32> %va, i32 signex
}
define <vscale x 16 x i32> @vdiv_vi_nxv16i32_0(<vscale x 16 x i32> %va) {
-; RV32-LABEL: vdiv_vi_nxv16i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 449390
-; RV32-NEXT: addi a0, a0, -1171
-; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT: vmulh.vx v16, v8, a0
-; RV32-NEXT: vsub.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 31
-; RV32-NEXT: vsra.vi v8, v8, 2
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdiv_vi_nxv16i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addi a0, a0, -1171
-; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV64-NEXT: vmulh.vx v16, v8, a0
-; RV64-NEXT: vsub.vv v8, v16, v8
-; RV64-NEXT: vsra.vi v8, v8, 2
-; RV64-NEXT: vsrl.vi v16, v8, 31
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: ret
+; CHECK-LABEL: vdiv_vi_nxv16i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 449390
+; CHECK-NEXT: addi a0, a0, -1171
+; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmulh.vx v16, v8, a0
+; CHECK-NEXT: vsub.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 31
+; CHECK-NEXT: vsra.vi v8, v8, 2
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: ret
%vc = sdiv <vscale x 16 x i32> %va, splat (i32 -7)
ret <vscale x 16 x i32> %vc
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
index 3fd7f5be860cf..c0c9b1797f91f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
@@ -48,18 +48,11 @@ define <vscale x 1 x i32> @vmulhu_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
}
define <vscale x 1 x i32> @vmulhu_vi_nxv1i32_1(<vscale x 1 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv1i32_1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 28
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv1i32_1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv1i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 28
+; CHECK-NEXT: ret
%vb = zext <vscale x 1 x i32> splat (i32 16) to <vscale x 1 x i64>
%vc = zext <vscale x 1 x i32> %va to <vscale x 1 x i64>
%vd = mul <vscale x 1 x i64> %vb, %vc
@@ -114,18 +107,11 @@ define <vscale x 2 x i32> @vmulhu_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
}
define <vscale x 2 x i32> @vmulhu_vi_nxv2i32_1(<vscale x 2 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv2i32_1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 28
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv2i32_1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv2i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 28
+; CHECK-NEXT: ret
%vb = zext <vscale x 2 x i32> splat (i32 16) to <vscale x 2 x i64>
%vc = zext <vscale x 2 x i32> %va to <vscale x 2 x i64>
%vd = mul <vscale x 2 x i64> %vb, %vc
@@ -180,18 +166,11 @@ define <vscale x 4 x i32> @vmulhu_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
}
define <vscale x 4 x i32> @vmulhu_vi_nxv4i32_1(<vscale x 4 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv4i32_1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 28
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv4i32_1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv4i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 28
+; CHECK-NEXT: ret
%vb = zext <vscale x 4 x i32> splat (i32 16) to <vscale x 4 x i64>
%vc = zext <vscale x 4 x i32> %va to <vscale x 4 x i64>
%vd = mul <vscale x 4 x i64> %vb, %vc
@@ -246,18 +225,11 @@ define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
}
define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_1(<vscale x 8 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv8i32_1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 28
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv8i32_1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv8i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 28
+; CHECK-NEXT: ret
%vb = zext <vscale x 8 x i32> splat (i32 16) to <vscale x 8 x i64>
%vc = zext <vscale x 8 x i32> %va to <vscale x 8 x i64>
%vd = mul <vscale x 8 x i64> %vb, %vc
@@ -265,3 +237,6 @@ define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_1(<vscale x 8 x i32> %va) {
%vf = trunc <vscale x 8 x i64> %ve to <vscale x 8 x i32>
ret <vscale x 8 x i32> %vf
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
index 9c21a626478e3..e3fad19bcb04c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
@@ -33,8 +33,8 @@ define <vscale x 1 x i8> @vrem_vi_nxv1i8_0(<vscale x 1 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a0
; CHECK-NEXT: vsub.vv v9, v9, v8
-; CHECK-NEXT: vsra.vi v9, v9, 2
; CHECK-NEXT: vsrl.vi v10, v9, 7
+; CHECK-NEXT: vsra.vi v9, v9, 2
; CHECK-NEXT: vadd.vv v9, v9, v10
; CHECK-NEXT: li a0, -7
; CHECK-NEXT: vnmsac.vx v8, a0, v9
@@ -85,8 +85,8 @@ define <vscale x 2 x i8> @vrem_vi_nxv2i8_0(<vscale x 2 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a0
; CHECK-NEXT: vsub.vv v9, v9, v8
-; CHECK-NEXT: vsra.vi v9, v9, 2
; CHECK-NEXT: vsrl.vi v10, v9, 7
+; CHECK-NEXT: vsra.vi v9, v9, 2
; CHECK-NEXT: vadd.vv v9, v9, v10
; CHECK-NEXT: li a0, -7
; CHECK-NEXT: vnmsac.vx v8, a0, v9
@@ -137,8 +137,8 @@ define <vscale x 4 x i8> @vrem_vi_nxv4i8_0(<vscale x 4 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a0
; CHECK-NEXT: vsub.vv v9, v9, v8
-; CHECK-NEXT: vsra.vi v9, v9, 2
; CHECK-NEXT: vsrl.vi v10, v9, 7
+; CHECK-NEXT: vsra.vi v9, v9, 2
; CHECK-NEXT: vadd.vv v9, v9, v10
; CHECK-NEXT: li a0, -7
; CHECK-NEXT: vnmsac.vx v8, a0, v9
@@ -189,8 +189,8 @@ define <vscale x 8 x i8> @vrem_vi_nxv8i8_0(<vscale x 8 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a0
; CHECK-NEXT: vsub.vv v9, v9, v8
-; CHECK-NEXT: vsra.vi v9, v9, 2
; CHECK-NEXT: vsrl.vi v10, v9, 7
+; CHECK-NEXT: vsra.vi v9, v9, 2
; CHECK-NEXT: vadd.vv v9, v9, v10
; CHECK-NEXT: li a0, -7
; CHECK-NEXT: vnmsac.vx v8, a0, v9
@@ -241,8 +241,8 @@ define <vscale x 16 x i8> @vrem_vi_nxv16i8_0(<vscale x 16 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; CHECK-NEXT: vmulh.vx v10, v8, a0
; CHECK-NEXT: vsub.vv v10, v10, v8
-; CHECK-NEXT: vsra.vi v10, v10, 2
; CHECK-NEXT: vsrl.vi v12, v10, 7
+; CHECK-NEXT: vsra.vi v10, v10, 2
; CHECK-NEXT: vadd.vv v10, v10, v12
; CHECK-NEXT: li a0, -7
; CHECK-NEXT: vnmsac.vx v8, a0, v10
@@ -293,8 +293,8 @@ define <vscale x 32 x i8> @vrem_vi_nxv32i8_0(<vscale x 32 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; CHECK-NEXT: vmulh.vx v12, v8, a0
; CHECK-NEXT: vsub.vv v12, v12, v8
-; CHECK-NEXT: vsra.vi v12, v12, 2
; CHECK-NEXT: vsrl.vi v16, v12, 7
+; CHECK-NEXT: vsra.vi v12, v12, 2
; CHECK-NEXT: vadd.vv v12, v12, v16
; CHECK-NEXT: li a0, -7
; CHECK-NEXT: vnmsac.vx v8, a0, v12
@@ -345,8 +345,8 @@ define <vscale x 64 x i8> @vrem_vi_nxv64i8_0(<vscale x 64 x i8> %va) {
; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma
; CHECK-NEXT: vmulh.vx v16, v8, a0
; CHECK-NEXT: vsub.vv v16, v16, v8
-; CHECK-NEXT: vsra.vi v16, v16, 2
; CHECK-NEXT: vsrl.vi v24, v16, 7
+; CHECK-NEXT: vsra.vi v16, v16, 2
; CHECK-NEXT: vadd.vv v16, v16, v24
; CHECK-NEXT: li a0, -7
; CHECK-NEXT: vnmsac.vx v8, a0, v16
@@ -384,8 +384,8 @@ define <vscale x 1 x i16> @vrem_vi_nxv1i16_0(<vscale x 1 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1755
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a0
-; CHECK-NEXT: vsra.vi v9, v9, 1
; CHECK-NEXT: vsrl.vi v10, v9, 15
+; CHECK-NEXT: vsra.vi v9, v9, 1
; CHECK-NEXT: vadd.vv v9, v9, v10
; CHECK-NEXT: li a0, -7
; CHECK-NEXT: vnmsac.vx v8, a0, v9
@@ -436,8 +436,8 @@ define <vscale x 2 x i16> @vrem_vi_nxv2i16_0(<vscale x 2 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1755
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a0
-; CHECK-NEXT: vsra.vi v9, v9, 1
; CHECK-NEXT: vsrl.vi v10, v9, 15
+; CHECK-NEXT: vsra.vi v9, v9, 1
; CHECK-NEXT: vadd.vv v9, v9, v10
; CHECK-NEXT: li a0, -7
; CHECK-NEXT: vnmsac.vx v8, a0, v9
@@ -488,8 +488,8 @@ define <vscale x 4 x i16> @vrem_vi_nxv4i16_0(<vscale x 4 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1755
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a0
-; CHECK-NEXT: vsra.vi v9, v9, 1
; CHECK-NEXT: vsrl.vi v10, v9, 15
+; CHECK-NEXT: vsra.vi v9, v9, 1
; CHECK-NEXT: vadd.vv v9, v9, v10
; CHECK-NEXT: li a0, -7
; CHECK-NEXT: vnmsac.vx v8, a0, v9
@@ -540,8 +540,8 @@ define <vscale x 8 x i16> @vrem_vi_nxv8i16_0(<vscale x 8 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1755
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; CHECK-NEXT: vmulh.vx v10, v8, a0
-; CHECK-NEXT: vsra.vi v10, v10, 1
; CHECK-NEXT: vsrl.vi v12, v10, 15
+; CHECK-NEXT: vsra.vi v10, v10, 1
; CHECK-NEXT: vadd.vv v10, v10, v12
; CHECK-NEXT: li a0, -7
; CHECK-NEXT: vnmsac.vx v8, a0, v10
@@ -592,8 +592,8 @@ define <vscale x 16 x i16> @vrem_vi_nxv16i16_0(<vscale x 16 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1755
; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; CHECK-NEXT: vmulh.vx v12, v8, a0
-; CHECK-NEXT: vsra.vi v12, v12, 1
; CHECK-NEXT: vsrl.vi v16, v12, 15
+; CHECK-NEXT: vsra.vi v12, v12, 1
; CHECK-NEXT: vadd.vv v12, v12, v16
; CHECK-NEXT: li a0, -7
; CHECK-NEXT: vnmsac.vx v8, a0, v12
@@ -644,8 +644,8 @@ define <vscale x 32 x i16> @vrem_vi_nxv32i16_0(<vscale x 32 x i16> %va) {
; CHECK-NEXT: addi a0, a0, 1755
; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; CHECK-NEXT: vmulh.vx v16, v8, a0
-; CHECK-NEXT: vsra.vi v16, v16, 1
; CHECK-NEXT: vsrl.vi v24, v16, 15
+; CHECK-NEXT: vsra.vi v16, v16, 1
; CHECK-NEXT: vadd.vv v16, v16, v24
; CHECK-NEXT: li a0, -7
; CHECK-NEXT: vnmsac.vx v8, a0, v16
@@ -677,33 +677,19 @@ define <vscale x 1 x i32> @vrem_vx_nxv1i32(<vscale x 1 x i32> %va, i32 signext %
}
define <vscale x 1 x i32> @vrem_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
-; RV32-LABEL: vrem_vi_nxv1i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 449390
-; RV32-NEXT: addi a0, a0, -1171
-; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT: vmulh.vx v9, v8, a0
-; RV32-NEXT: vsub.vv v9, v9, v8
-; RV32-NEXT: vsrl.vi v10, v9, 31
-; RV32-NEXT: vsra.vi v9, v9, 2
-; RV32-NEXT: vadd.vv v9, v9, v10
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vrem_vi_nxv1i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addi a0, a0, -1171
-; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT: vmulh.vx v9, v8, a0
-; RV64-NEXT: vsub.vv v9, v9, v8
-; RV64-NEXT: vsra.vi v9, v9, 2
-; RV64-NEXT: vsrl.vi v10, v9, 31
-; RV64-NEXT: vadd.vv v9, v9, v10
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vrem_vi_nxv1i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 449390
+; CHECK-NEXT: addi a0, a0, -1171
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmulh.vx v9, v8, a0
+; CHECK-NEXT: vsub.vv v9, v9, v8
+; CHECK-NEXT: vsrl.vi v10, v9, 31
+; CHECK-NEXT: vsra.vi v9, v9, 2
+; CHECK-NEXT: vadd.vv v9, v9, v10
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v9
+; CHECK-NEXT: ret
%vc = srem <vscale x 1 x i32> %va, splat (i32 -7)
ret <vscale x 1 x i32> %vc
}
@@ -731,33 +717,19 @@ define <vscale x 2 x i32> @vrem_vx_nxv2i32(<vscale x 2 x i32> %va, i32 signext %
}
define <vscale x 2 x i32> @vrem_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
-; RV32-LABEL: vrem_vi_nxv2i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 449390
-; RV32-NEXT: addi a0, a0, -1171
-; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT: vmulh.vx v9, v8, a0
-; RV32-NEXT: vsub.vv v9, v9, v8
-; RV32-NEXT: vsrl.vi v10, v9, 31
-; RV32-NEXT: vsra.vi v9, v9, 2
-; RV32-NEXT: vadd.vv v9, v9, v10
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vrem_vi_nxv2i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addi a0, a0, -1171
-; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT: vmulh.vx v9, v8, a0
-; RV64-NEXT: vsub.vv v9, v9, v8
-; RV64-NEXT: vsra.vi v9, v9, 2
-; RV64-NEXT: vsrl.vi v10, v9, 31
-; RV64-NEXT: vadd.vv v9, v9, v10
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vrem_vi_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 449390
+; CHECK-NEXT: addi a0, a0, -1171
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmulh.vx v9, v8, a0
+; CHECK-NEXT: vsub.vv v9, v9, v8
+; CHECK-NEXT: vsrl.vi v10, v9, 31
+; CHECK-NEXT: vsra.vi v9, v9, 2
+; CHECK-NEXT: vadd.vv v9, v9, v10
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v9
+; CHECK-NEXT: ret
%vc = srem <vscale x 2 x i32> %va, splat (i32 -7)
ret <vscale x 2 x i32> %vc
}
@@ -785,33 +757,19 @@ define <vscale x 4 x i32> @vrem_vx_nxv4i32(<vscale x 4 x i32> %va, i32 signext %
}
define <vscale x 4 x i32> @vrem_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
-; RV32-LABEL: vrem_vi_nxv4i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 449390
-; RV32-NEXT: addi a0, a0, -1171
-; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT: vmulh.vx v10, v8, a0
-; RV32-NEXT: vsub.vv v10, v10, v8
-; RV32-NEXT: vsrl.vi v12, v10, 31
-; RV32-NEXT: vsra.vi v10, v10, 2
-; RV32-NEXT: vadd.vv v10, v10, v12
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vrem_vi_nxv4i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addi a0, a0, -1171
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vmulh.vx v10, v8, a0
-; RV64-NEXT: vsub.vv v10, v10, v8
-; RV64-NEXT: vsra.vi v10, v10, 2
-; RV64-NEXT: vsrl.vi v12, v10, 31
-; RV64-NEXT: vadd.vv v10, v10, v12
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: vrem_vi_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 449390
+; CHECK-NEXT: addi a0, a0, -1171
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmulh.vx v10, v8, a0
+; CHECK-NEXT: vsub.vv v10, v10, v8
+; CHECK-NEXT: vsrl.vi v12, v10, 31
+; CHECK-NEXT: vsra.vi v10, v10, 2
+; CHECK-NEXT: vadd.vv v10, v10, v12
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v10
+; CHECK-NEXT: ret
%vc = srem <vscale x 4 x i32> %va, splat (i32 -7)
ret <vscale x 4 x i32> %vc
}
@@ -839,33 +797,19 @@ define <vscale x 8 x i32> @vrem_vx_nxv8i32(<vscale x 8 x i32> %va, i32 signext %
}
define <vscale x 8 x i32> @vrem_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
-; RV32-LABEL: vrem_vi_nxv8i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 449390
-; RV32-NEXT: addi a0, a0, -1171
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vmulh.vx v12, v8, a0
-; RV32-NEXT: vsub.vv v12, v12, v8
-; RV32-NEXT: vsrl.vi v16, v12, 31
-; RV32-NEXT: vsra.vi v12, v12, 2
-; RV32-NEXT: vadd.vv v12, v12, v16
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v12
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vrem_vi_nxv8i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addi a0, a0, -1171
-; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT: vmulh.vx v12, v8, a0
-; RV64-NEXT: vsub.vv v12, v12, v8
-; RV64-NEXT: vsra.vi v12, v12, 2
-; RV64-NEXT: vsrl.vi v16, v12, 31
-; RV64-NEXT: vadd.vv v12, v12, v16
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v12
-; RV64-NEXT: ret
+; CHECK-LABEL: vrem_vi_nxv8i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 449390
+; CHECK-NEXT: addi a0, a0, -1171
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmulh.vx v12, v8, a0
+; CHECK-NEXT: vsub.vv v12, v12, v8
+; CHECK-NEXT: vsrl.vi v16, v12, 31
+; CHECK-NEXT: vsra.vi v12, v12, 2
+; CHECK-NEXT: vadd.vv v12, v12, v16
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v12
+; CHECK-NEXT: ret
%vc = srem <vscale x 8 x i32> %va, splat (i32 -7)
ret <vscale x 8 x i32> %vc
}
@@ -893,33 +837,19 @@ define <vscale x 16 x i32> @vrem_vx_nxv16i32(<vscale x 16 x i32> %va, i32 signex
}
define <vscale x 16 x i32> @vrem_vi_nxv16i32_0(<vscale x 16 x i32> %va) {
-; RV32-LABEL: vrem_vi_nxv16i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 449390
-; RV32-NEXT: addi a0, a0, -1171
-; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT: vmulh.vx v16, v8, a0
-; RV32-NEXT: vsub.vv v16, v16, v8
-; RV32-NEXT: vsrl.vi v24, v16, 31
-; RV32-NEXT: vsra.vi v16, v16, 2
-; RV32-NEXT: vadd.vv v16, v16, v24
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vrem_vi_nxv16i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addi a0, a0, -1171
-; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV64-NEXT: vmulh.vx v16, v8, a0
-; RV64-NEXT: vsub.vv v16, v16, v8
-; RV64-NEXT: vsra.vi v16, v16, 2
-; RV64-NEXT: vsrl.vi v24, v16, 31
-; RV64-NEXT: vadd.vv v16, v16, v24
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v16
-; RV64-NEXT: ret
+; CHECK-LABEL: vrem_vi_nxv16i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 449390
+; CHECK-NEXT: addi a0, a0, -1171
+; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmulh.vx v16, v8, a0
+; CHECK-NEXT: vsub.vv v16, v16, v8
+; CHECK-NEXT: vsrl.vi v24, v16, 31
+; CHECK-NEXT: vsra.vi v16, v16, 2
+; CHECK-NEXT: vadd.vv v16, v16, v24
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v16
+; CHECK-NEXT: ret
%vc = srem <vscale x 16 x i32> %va, splat (i32 -7)
ret <vscale x 16 x i32> %vc
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll
index e471f4b2e92b5..65f847f562d75 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll
@@ -15,7 +15,7 @@ define <vscale x 8 x i7> @vsadd_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <
; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
; CHECK-NEXT: li a0, 63
; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t
-; CHECK-NEXT: li a0, 192
+; CHECK-NEXT: li a0, -64
; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 8 x i7> poison, i7 %b, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
index ebf8d5eeb40bc..b98da42697753 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
@@ -15,7 +15,7 @@ define <vscale x 8 x i7> @vssub_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <
; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
; CHECK-NEXT: li a0, 63
; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t
-; CHECK-NEXT: li a0, 192
+; CHECK-NEXT: li a0, -64
; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 8 x i7> poison, i7 %b, i32 0
>From fc99e91d581d758eb680e1eebd56c0984e498db3 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Fri, 6 Jun 2025 16:59:26 +0000
Subject: [PATCH 2/2] fix X86 DAGCombiner hang
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 5 +++--
llvm/test/CodeGen/X86/avx512-select.ll | 12 ++++++++----
llvm/test/CodeGen/X86/vselect-zero.ll | 3 ++-
llvm/test/CodeGen/X86/x86-interleaved-access.ll | 13 +++++++++----
4 files changed, 22 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bf5ba25cd3104..3ac53b63b64e5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48121,8 +48121,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Check if the first operand is all zeros and Cond type is vXi1.
// If this an avx512 target we can improve the use of zero masking by
// swapping the operands and inverting the condition.
- if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() &&
- Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 &&
+ if (!DCI.isBeforeLegalize() && N->getOpcode() == ISD::VSELECT &&
+ Cond.hasOneUse() && Subtarget.hasAVX512() &&
+ CondVT.getVectorElementType() == MVT::i1 &&
ISD::isBuildVectorAllZeros(LHS.getNode()) &&
!ISD::isBuildVectorAllZeros(RHS.getNode())) {
// Invert the cond to not(cond) : xor(op,allones)=not(op)
diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll
index 721ffbe1ceb79..c30688e9fb33d 100644
--- a/llvm/test/CodeGen/X86/avx512-select.ll
+++ b/llvm/test/CodeGen/X86/avx512-select.ll
@@ -743,7 +743,8 @@ define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) {
; X86-AVX512F-LABEL: julia_issue36955:
; X86-AVX512F: # %bb.0:
; X86-AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; X86-AVX512F-NEXT: vcmplepd %zmm0, %zmm1, %k0
+; X86-AVX512F-NEXT: vcmpnlepd %zmm0, %zmm1, %k0
+; X86-AVX512F-NEXT: knotw %k0, %k0
; X86-AVX512F-NEXT: kmovw %k0, %eax
; X86-AVX512F-NEXT: # kill: def $al killed $al killed $eax
; X86-AVX512F-NEXT: vzeroupper
@@ -752,7 +753,8 @@ define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) {
; X64-AVX512F-LABEL: julia_issue36955:
; X64-AVX512F: # %bb.0:
; X64-AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; X64-AVX512F-NEXT: vcmplepd %zmm0, %zmm1, %k0
+; X64-AVX512F-NEXT: vcmpnlepd %zmm0, %zmm1, %k0
+; X64-AVX512F-NEXT: knotw %k0, %k0
; X64-AVX512F-NEXT: kmovw %k0, %eax
; X64-AVX512F-NEXT: # kill: def $al killed $al killed $eax
; X64-AVX512F-NEXT: vzeroupper
@@ -761,7 +763,8 @@ define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) {
; X86-AVX512BW-LABEL: julia_issue36955:
; X86-AVX512BW: # %bb.0:
; X86-AVX512BW-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; X86-AVX512BW-NEXT: vcmplepd %zmm0, %zmm1, %k0
+; X86-AVX512BW-NEXT: vcmpnlepd %zmm0, %zmm1, %k0
+; X86-AVX512BW-NEXT: knotw %k0, %k0
; X86-AVX512BW-NEXT: kmovd %k0, %eax
; X86-AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; X86-AVX512BW-NEXT: vzeroupper
@@ -770,7 +773,8 @@ define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) {
; X64-AVX512BW-LABEL: julia_issue36955:
; X64-AVX512BW: # %bb.0:
; X64-AVX512BW-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; X64-AVX512BW-NEXT: vcmplepd %zmm0, %zmm1, %k0
+; X64-AVX512BW-NEXT: vcmpnlepd %zmm0, %zmm1, %k0
+; X64-AVX512BW-NEXT: knotw %k0, %k0
; X64-AVX512BW-NEXT: kmovd %k0, %eax
; X64-AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; X64-AVX512BW-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vselect-zero.ll b/llvm/test/CodeGen/X86/vselect-zero.ll
index b3bb01137c70d..9a72bdb1d41f4 100644
--- a/llvm/test/CodeGen/X86/vselect-zero.ll
+++ b/llvm/test/CodeGen/X86/vselect-zero.ll
@@ -56,7 +56,8 @@ define <4 x i32> @test2(<4 x float> %a, <4 x float> %b, <4 x i32> %x) {
;
; AVX512-LABEL: test2:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpneqps %xmm1, %xmm0, %k1
+; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0
+; AVX512-NEXT: knotw %k0, %k1
; AVX512-NEXT: vmovdqa32 %xmm2, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%cond = fcmp oeq <4 x float> %a, %b
diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
index 7cddebdca5cca..d73fd876649fa 100644
--- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll
+++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
@@ -478,7 +478,8 @@ define <16 x i1> @interleaved_load_vf16_i8_stride4(ptr %ptr) nounwind {
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vpcmpeqb %xmm0, %xmm5, %xmm0
; AVX1-NEXT: vpxor %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: interleaved_load_vf16_i8_stride4:
@@ -517,7 +518,8 @@ define <16 x i1> @interleaved_load_vf16_i8_stride4(ptr %ptr) nounwind {
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3]
; AVX2-NEXT: vpcmpeqb %xmm0, %xmm3, %xmm0
; AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@@ -633,7 +635,9 @@ define <32 x i1> @interleaved_load_vf32_i8_stride4(ptr %ptr) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm9, %ymm8, %ymm2
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vxorps %ymm0, %ymm2, %ymm0
-; AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: interleaved_load_vf32_i8_stride4:
@@ -698,7 +702,8 @@ define <32 x i1> @interleaved_load_vf32_i8_stride4(ptr %ptr) nounwind {
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm3[4,5,6,7]
; AVX2-NEXT: vpcmpeqb %ymm0, %ymm6, %ymm0
; AVX2-NEXT: vpxor %ymm0, %ymm5, %ymm0
-; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: interleaved_load_vf32_i8_stride4:
More information about the llvm-commits
mailing list