[llvm-branch-commits] [RISCV] Support memcmp expansion for vectors (PR #114517)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Nov 1 00:03:06 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Pengcheng Wang (wangpc-pp)
<details>
<summary>Changes</summary>
---
Patch is 404.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114517.diff
4 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+100-3)
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+5)
- (modified) llvm/test/CodeGen/RISCV/memcmp-optsize.ll (+920-530)
- (modified) llvm/test/CodeGen/RISCV/memcmp.ll (+4570-1843)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3b3f8772a08940..89b4f22a1260db 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -14474,17 +14475,116 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D
return true;
}
+/// Recursive helper for combineVectorSizedSetCCEquality() to see if we have a
+/// recognizable memcmp expansion.
+static bool isOrXorXorTree(SDValue X, bool Root = true) {
+ if (X.getOpcode() == ISD::OR)
+ return isOrXorXorTree(X.getOperand(0), false) &&
+ isOrXorXorTree(X.getOperand(1), false);
+ if (Root)
+ return false;
+ return X.getOpcode() == ISD::XOR;
+}
+
+/// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp
+/// expansion.
+static SDValue emitOrXorXorTree(SDValue X, const SDLoc &DL, SelectionDAG &DAG,
+ EVT VecVT, EVT CmpVT) {
+ SDValue Op0 = X.getOperand(0);
+ SDValue Op1 = X.getOperand(1);
+ if (X.getOpcode() == ISD::OR) {
+ SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT);
+ SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT);
+ if (VecVT != CmpVT)
+ return DAG.getNode(ISD::OR, DL, CmpVT, A, B);
+ return DAG.getNode(ISD::AND, DL, CmpVT, A, B);
+ }
+ if (X.getOpcode() == ISD::XOR) {
+ SDValue A = DAG.getBitcast(VecVT, Op0);
+ SDValue B = DAG.getBitcast(VecVT, Op1);
+ if (VecVT != CmpVT)
+ return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE);
+ return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
+ }
+ llvm_unreachable("Impossible");
+}
+
+/// Try to map a 128-bit or larger integer comparison to vector instructions
+/// before type legalization splits it up into chunks.
+static SDValue
+combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
+ const SDLoc &DL, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate");
+
+ EVT OpVT = X.getValueType();
+ MVT XLenVT = Subtarget.getXLenVT();
+ unsigned OpSize = OpVT.getSizeInBits();
+
+ // We're looking for an oversized integer equality comparison.
+ if (!Subtarget.hasVInstructions() || !OpVT.isScalarInteger() ||
+ OpSize < Subtarget.getRealMinVLen() ||
+ OpSize > Subtarget.getRealMinVLen() * 8)
+ return SDValue();
+
+ bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X);
+ if (isNullConstant(Y) && !IsOrXorXorTreeCCZero)
+ return SDValue();
+
+ // Don't perform this combine if constructing the vector will be expensive.
+ auto IsVectorBitCastCheap = [](SDValue X) {
+ X = peekThroughBitcasts(X);
+ return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
+ X.getOpcode() == ISD::LOAD;
+ };
+ if ((!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) &&
+ !IsOrXorXorTreeCCZero)
+ return SDValue();
+
+ bool NoImplicitFloatOps =
+ DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+ if (!NoImplicitFloatOps && Subtarget.hasVInstructions()) {
+ unsigned VecSize = OpSize / 8;
+ EVT VecVT = MVT::getVectorVT(MVT::i8, VecSize);
+ EVT CmpVT = MVT::getVectorVT(MVT::i1, VecSize);
+
+ SDValue Cmp;
+ if (IsOrXorXorTreeCCZero) {
+ Cmp = emitOrXorXorTree(X, DL, DAG, VecVT, CmpVT);
+ } else {
+ SDValue VecX = DAG.getBitcast(VecVT, X);
+ SDValue VecY = DAG.getBitcast(VecVT, Y);
+ Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ);
+ }
+ return DAG.getSetCC(DL, VT,
+ DAG.getNode(ISD::VECREDUCE_AND, DL, XLenVT, Cmp),
+ DAG.getConstant(0, DL, XLenVT), CC);
+ }
+
+ return SDValue();
+}
+
// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
// can become a sext.w instead of a shift pair.
static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
+ SDLoc dl(N);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
+ // Looking for an equality compare.
+ ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
+ if (SDValue V = combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG,
+ Subtarget))
+ return V;
+ }
+
if (OpVT != MVT::i64 || !Subtarget.is64Bit())
return SDValue();
@@ -14499,8 +14599,6 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
return SDValue();
- // Looking for an equality compare.
- ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
if (!isIntEqualitySetCC(Cond))
return SDValue();
@@ -14512,7 +14610,6 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
const APInt &C1 = N1C->getAPIntValue();
- SDLoc dl(N);
// If the constant is larger than 2^32 - 1 it is impossible for both sides
// to be equal.
if (C1.getActiveBits() > 32)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 5f5a18e2868730..d7b05001185f32 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2504,5 +2504,10 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
Options.LoadSizes = {8, 4, 2, 1};
else
Options.LoadSizes = {4, 2, 1};
+ if (IsZeroCmp && ST->hasVInstructions()) {
+ unsigned RealMinVLen = ST->getRealMinVLen() / 8;
+ for (int LMUL = 1; LMUL <= 8; LMUL *= 2)
+ Options.LoadSizes.insert(Options.LoadSizes.begin(), RealMinVLen * LMUL);
+ }
return Options;
}
diff --git a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
index 06fb88b02ea4a6..ba702b4921f098 100644
--- a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
@@ -2910,190 +2910,24 @@ define i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind optsize {
;
; CHECK-ALIGNED-RV32-V-LABEL: bcmp_size_16:
; CHECK-ALIGNED-RV32-V: # %bb.0: # %entry
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a2, 1(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 3(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a2, a2, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a5, a4
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 1(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: or a2, a4, a2
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 3(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a5, a3
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a6, a4
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a4, a3
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 4(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 5(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: xor a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 6(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 7(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a5, a4
-; CHECK-ALIGNED-RV32-V-NEXT: slli a3, a3, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a6, a3
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 4(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 5(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a3, a4
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 6(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a7, 7(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a5, a6, a5
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a7, a7, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a7, a4
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a4, a5
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 8(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 9(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: xor a3, a3, a4
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 10(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a7, 11(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a5, a6, a5
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a7, a7, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a7, a4
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 8(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a7, 9(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a4, a5
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 10(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu t0, 11(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a7, a7, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a6, a7, a6
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli t0, t0, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a5, t0, a5
-; CHECK-ALIGNED-RV32-V-NEXT: or a5, a5, a6
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 12(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a7, 13(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: xor a4, a4, a5
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 14(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a0, 15(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a7, a7, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a6, a7, a6
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a0, a0, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a0, a0, a5
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 12(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a7, 13(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: or a0, a0, a6
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 14(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a1, 15(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a7, a7, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a5, a7, a5
-; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a1, a1, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a1, a1, a6
-; CHECK-ALIGNED-RV32-V-NEXT: or a1, a1, a5
-; CHECK-ALIGNED-RV32-V-NEXT: xor a0, a0, a1
-; CHECK-ALIGNED-RV32-V-NEXT: or a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT: or a0, a4, a0
-; CHECK-ALIGNED-RV32-V-NEXT: or a0, a2, a0
-; CHECK-ALIGNED-RV32-V-NEXT: snez a0, a0
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vle8.v v8, (a0)
+; CHECK-ALIGNED-RV32-V-NEXT: vle8.v v9, (a1)
+; CHECK-ALIGNED-RV32-V-NEXT: vmseq.vv v8, v8, v9
+; CHECK-ALIGNED-RV32-V-NEXT: vmnot.m v8, v8
+; CHECK-ALIGNED-RV32-V-NEXT: vcpop.m a0, v8
+; CHECK-ALIGNED-RV32-V-NEXT: seqz a0, a0
; CHECK-ALIGNED-RV32-V-NEXT: ret
;
; CHECK-ALIGNED-RV64-V-LABEL: bcmp_size_16:
; CHECK-ALIGNED-RV64-V: # %bb.0: # %entry
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a2, 1(a0)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a3, 0(a0)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 2(a0)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 3(a0)
-; CHECK-ALIGNED-RV64-V-NEXT: slli a2, a2, 8
-; CHECK-ALIGNED-RV64-V-NEXT: or a2, a2, a3
-; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV64-V-NEXT: slli a5, a5, 24
-; CHECK-ALIGNED-RV64-V-NEXT: or a4, a5, a4
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a3, 4(a0)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 5(a0)
-; CHECK-ALIGNED-RV64-V-NEXT: or a2, a4, a2
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 6(a0)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a6, 7(a0)
-; CHECK-ALIGNED-RV64-V-NEXT: slli a5, a5, 8
-; CHECK-ALIGNED-RV64-V-NEXT: or a3, a5, a3
-; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV64-V-NEXT: slli a6, a6, 24
-; CHECK-ALIGNED-RV64-V-NEXT: or a4, a6, a4
-; CHECK-ALIGNED-RV64-V-NEXT: or a3, a4, a3
-; CHECK-ALIGNED-RV64-V-NEXT: slli a3, a3, 32
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 0(a1)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 1(a1)
-; CHECK-ALIGNED-RV64-V-NEXT: or a2, a3, a2
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a3, 2(a1)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a6, 3(a1)
-; CHECK-ALIGNED-RV64-V-NEXT: slli a5, a5, 8
-; CHECK-ALIGNED-RV64-V-NEXT: or a4, a5, a4
-; CHECK-ALIGNED-RV64-V-NEXT: slli a3, a3, 16
-; CHECK-ALIGNED-RV64-V-NEXT: slli a6, a6, 24
-; CHECK-ALIGNED-RV64-V-NEXT: or a3, a6, a3
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 4(a1)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a6, 5(a1)
-; CHECK-ALIGNED-RV64-V-NEXT: or a3, a3, a4
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 6(a1)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a7, 7(a1)
-; CHECK-ALIGNED-RV64-V-NEXT: slli a6, a6, 8
-; CHECK-ALIGNED-RV64-V-NEXT: or a5, a6, a5
-; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV64-V-NEXT: slli a7, a7, 24
-; CHECK-ALIGNED-RV64-V-NEXT: or a4, a7, a4
-; CHECK-ALIGNED-RV64-V-NEXT: or a4, a4, a5
-; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 32
-; CHECK-ALIGNED-RV64-V-NEXT: or a3, a4, a3
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 8(a0)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 9(a0)
-; CHECK-ALIGNED-RV64-V-NEXT: xor a2, a2, a3
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a3, 10(a0)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a6, 11(a0)
-; CHECK-ALIGNED-RV64-V-NEXT: slli a5, a5, 8
-; CHECK-ALIGNED-RV64-V-NEXT: or a4, a5, a4
-; CHECK-ALIGNED-RV64-V-NEXT: slli a3, a3, 16
-; CHECK-ALIGNED-RV64-V-NEXT: slli a6, a6, 24
-; CHECK-ALIGNED-RV64-V-NEXT: or a3, a6, a3
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 12(a0)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a6, 13(a0)
-; CHECK-ALIGNED-RV64-V-NEXT: or a3, a3, a4
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 14(a0)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a0, 15(a0)
-; CHECK-ALIGNED-RV64-V-NEXT: slli a6, a6, 8
-; CHECK-ALIGNED-RV64-V-NEXT: or a5, a6, a5
-; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV64-V-NEXT: slli a0, a0, 24
-; CHECK-ALIGNED-RV64-V-NEXT: or a0, a0, a4
-; CHECK-ALIGNED-RV64-V-NEXT: or a0, a0, a5
-; CHECK-ALIGNED-RV64-V-NEXT: slli a0, a0, 32
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 8(a1)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 9(a1)
-; CHECK-ALIGNED-RV64-V-NEXT: or a0, a0, a3
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a3, 10(a1)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a6, 11(a1)
-; CHECK-ALIGNED-RV64-V-NEXT: slli a5, a5, 8
-; CHECK-ALIGNED-RV64-V-NEXT: or a4, a5, a4
-; CHECK-ALIGNED-RV64-V-NEXT: slli a3, a3, 16
-; CHECK-ALIGNED-RV64-V-NEXT: slli a6, a6, 24
-; CHECK-ALIGNED-RV64-V-NEXT: or a3, a6, a3
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 12(a1)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a6, 13(a1)
-; CHECK-ALIGNED-RV64-V-NEXT: or a3, a3, a4
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 14(a1)
-; CHECK-ALIGNED-RV64-V-NEXT: lbu a1, 15(a1)
-; CHECK-ALIGNED-RV64-V-NEXT: slli a6, a6, 8
-; CHECK-ALIGNED-RV64-V-NEXT: or a5, a6, a5
-; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV64-V-NEXT: slli a1, a1, 24
-; CHECK-ALIGNED-RV64-V-NEXT: or a1, a1, a4
-; CHECK-ALIGNED-RV64-V-NEXT: or a1, a1, a5
-; CHECK-ALIGNED-RV64-V-NEXT: slli a1, a1, 32
-; CHECK-ALIGNED-RV64-V-NEXT: or a1, a1, a3
-; CHECK-ALIGNED-RV64-V-NEXT: xor a0, a0, a1
-; CHECK-ALIGNED-RV64-V-NEXT: or a0, a2, a0
-; CHECK-ALIGNED-RV64-V-NEXT: snez a0, a0
+; CHECK-ALIGNED-RV64-V-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-ALIGNED-RV64-V-NEXT: vle8.v v8, (a0)
+; CHECK-ALIGNED-RV64-V-NEXT: vle8.v v9, (a1)
+; CHECK-ALIGNED-RV64-V-NEXT: vmseq.vv v8, v8, v9
+; CHECK-ALIGNED-RV64-V-NEXT: vmnot.m v8, v8
+; CHECK-ALIGNED-RV64-V-NEXT: vcpop.m a0, v8
+; CHECK-ALIGNED-RV64-V-NEXT: seqz a0, a0
; CHECK-ALIGNED-RV64-V-NEXT: ret
;
; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_16:
@@ -3194,34 +3028,24 @@ define i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind optsize {
;
; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_16:
; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-V-NEXT: lw a2, 0(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT: lw a3, 4(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT: lw a4, 8(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT: lw a0, 12(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT: lw a5, 0(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT: lw a6, 4(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT: lw a7, 8(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT: lw a1, 12(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a2, a5
-; CHECK-UNALIGNED-RV32-V-NEXT: xor a3, a3, a6
-; CHECK-UNALIGNED-RV32-V-NEXT: xor a4, a4, a7
-; CHECK-UNALIGNED-RV32-V-NEXT: xor a0, a0, a1
-; CHECK-UNALIGNED-RV32-V-NEXT: or a2, a2, a3
-; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a4, a0
-; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a2, a0
-; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT: vmseq.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT: vmnot.m v8, v8
+; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8
+; CHECK-UNALIGNED-RV32-V-NEXT: seqz a0, a0
; CHECK-UNALIGNED-RV32-V-NEXT: ret
;
; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_16:
; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-V-NEXT: ld a2, 0(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT: ld a0, 8(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT: ld a3, 0(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT: ld a1, 8(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3
-; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1
-; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0
-; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV64-V-NEXT: vmseq.vv v8, v8, v9
+; CHECK-UNALIGNED-RV64-V-NEXT: vmnot.m v8, v8
+; CHECK-UNALIGNED-RV64-V-NEXT: vcpop.m a0, v8
+; CHECK-UNALIGNED-RV64-V-NEXT: seqz a0, a0
; CHECK-UNALIGNED-RV64-V-NEXT: ret
entry:
%bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 16)
@@ -3229,15 +3053,15 @@ entry:
}
define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize {
-; CHECK-RV32-LABEL: bcmp_size_31:
-; CHECK-RV32: # %bb.0: # %entry
-; CHECK-RV32-NEXT: addi sp, sp, -16
-; CHECK-RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: li a2, 31
-; CHECK-RV32-NEXT: call bcmp
-; CHECK-RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: addi sp, sp, 16
-; CHECK-RV32-NEXT: ret
+; CHECK-ALIGNED-RV32-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV32: # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-NEXT: addi sp, sp, -16
+; CHECK-ALIGNED-RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-ALIGNED-RV32-NEXT: li a2, 31
+; CHECK-ALIGNED-RV32-NEXT: call bcmp
+; CHECK-ALIGNED-RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-ALIGNED-RV32-NEXT: addi sp, sp, 16
+; CHECK-ALIGNED-RV32-NEXT: ret
;
; CHECK-ALIGNED-RV64-LABEL: bcmp_size_31:
; CHECK-ALIGNED-RV64: # %bb.0: # %entry
@@ -3249,6 +3073,16 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize {
; CHECK-ALIGNED-RV64-NEXT: addi sp, sp, 16
; CHECK-ALIGNED-RV64-NEXT: ret
;
+; CHECK-ALIGNED-RV32-ZBB-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV32-ZBB: # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-ZBB-NEXT: addi sp, sp, -16
+; CHECK-ALIGNED-RV32-ZBB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-ALIGNED-RV32-ZBB-NEXT: li a2, 31
+; CHECK-ALIGNED-RV32-ZBB-NEXT: call bcmp
+; CHECK-ALIGNED-RV32-ZBB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-ALIGNED-RV32-ZBB-NEXT: addi sp, sp, 16
+; CHECK-ALIGNED-RV32-ZBB-NEXT: ret
+;
; CHECK-ALIGNED-RV64-ZBB-LABEL: bcmp_size_31:
; CHECK-ALIGNED-RV64-ZBB: # %bb.0: # %entry
; CHECK...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/114517
More information about the llvm-branch-commits
mailing list