[llvm-branch-commits] [RISCV] Support non-power-of-2 types when expanding memcmp (PR #114971)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Nov 5 03:58:42 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Pengcheng Wang (wangpc-pp)
<details>
<summary>Changes</summary>
We can convert non-power-of-2 types into extended value types
and then they will be widen.
---
Patch is 253.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114971.diff
4 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+2-6)
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+4-14)
- (modified) llvm/test/CodeGen/RISCV/memcmp-optsize.ll (+462-760)
- (modified) llvm/test/CodeGen/RISCV/memcmp.ll (+460-3282)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 72d85491a6f77d..e67515c24e8341 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14492,10 +14492,6 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
return SDValue();
unsigned OpSize = OpVT.getSizeInBits();
- // TODO: Support non-power-of-2 types.
- if (!isPowerOf2_32(OpSize))
- return SDValue();
-
// The size should be larger than XLen and smaller than the maximum vector
// size.
if (OpSize <= Subtarget.getXLen() ||
@@ -14517,8 +14513,8 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
return SDValue();
unsigned VecSize = OpSize / 8;
- EVT VecVT = MVT::getVectorVT(MVT::i8, VecSize);
- EVT CmpVT = MVT::getVectorVT(MVT::i1, VecSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
+ EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
SDValue VecX = DAG.getBitcast(VecVT, X);
SDValue VecY = DAG.getBitcast(VecVT, Y);
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 637b670cf041a5..c65feb9755633c 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2507,20 +2507,10 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
else
Options.LoadSizes = {4, 2, 1};
if (IsZeroCmp && ST->hasVInstructions()) {
- unsigned RealMinVLen = ST->getRealMinVLen();
- // Support Fractional LMULs if the lengths are larger than XLen.
- // TODO: Support non-power-of-2 types.
- for (unsigned LMUL = 8; LMUL >= 2; LMUL /= 2) {
- unsigned Len = RealMinVLen / LMUL;
- if (Len > ST->getXLen())
- Options.LoadSizes.insert(Options.LoadSizes.begin(), Len / 8);
- }
- for (unsigned LMUL = 1; LMUL <= ST->getMaxLMULForFixedLengthVectors();
- LMUL *= 2) {
- unsigned Len = RealMinVLen * LMUL;
- if (Len > ST->getXLen())
- Options.LoadSizes.insert(Options.LoadSizes.begin(), Len / 8);
- }
+ unsigned VLenB = ST->getRealMinVLen() / 8;
+ for (unsigned Size = ST->getXLen() / 8 + 1;
+ Size <= VLenB * ST->getMaxLMULForFixedLengthVectors(); Size++)
+ Options.LoadSizes.insert(Options.LoadSizes.begin(), Size);
}
return Options;
}
diff --git a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
index b39e6a425d702f..800b5a80fdf160 100644
--- a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
@@ -739,31 +739,14 @@ define i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
;
; CHECK-ALIGNED-RV32-V-LABEL: bcmp_size_5:
; CHECK-ALIGNED-RV32-V: # %bb.0: # %entry
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a2, 1(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 3(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a2, a2, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a5, a4
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 1(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: or a2, a4, a2
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 3(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a5, a3
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 24
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a0, 4(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a1, 4(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a6, a4
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a4, a3
-; CHECK-ALIGNED-RV32-V-NEXT: xor a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT: xor a0, a0, a1
-; CHECK-ALIGNED-RV32-V-NEXT: or a0, a2, a0
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vle8.v v8, (a0)
+; CHECK-ALIGNED-RV32-V-NEXT: vle8.v v9, (a1)
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vmset.m v0
+; CHECK-ALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vcpop.m a0, v8, v0.t
; CHECK-ALIGNED-RV32-V-NEXT: snez a0, a0
; CHECK-ALIGNED-RV32-V-NEXT: ret
;
@@ -797,17 +780,102 @@ define i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
; CHECK-ALIGNED-RV64-V-NEXT: snez a0, a0
; CHECK-ALIGNED-RV64-V-NEXT: ret
;
-; CHECK-UNALIGNED-LABEL: bcmp_size_5:
-; CHECK-UNALIGNED: # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT: lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT: lbu a0, 4(a0)
-; CHECK-UNALIGNED-NEXT: lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT: lbu a1, 4(a1)
-; CHECK-UNALIGNED-NEXT: xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT: xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT: or a0, a2, a0
-; CHECK-UNALIGNED-NEXT: snez a0, a0
-; CHECK-UNALIGNED-NEXT: ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT: lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT: lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-NEXT: ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT: lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT: lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT: ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT: vmset.m v0
+; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT: ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT: lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT: lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT: ret
entry:
%bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 5)
ret i32 %bcmp
@@ -1020,37 +1088,14 @@ define i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
;
; CHECK-ALIGNED-RV32-V-LABEL: bcmp_size_6:
; CHECK-ALIGNED-RV32-V: # %bb.0: # %entry
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a2, 1(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 3(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a2, a2, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a5, a4
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 1(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: or a2, a4, a2
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 3(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a5, a3
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a6, a4
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a4, a3
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 4(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a0, 5(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 4(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a1, 5(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: xor a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT: slli a0, a0, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a0, a0, a4
-; CHECK-ALIGNED-RV32-V-NEXT: slli a1, a1, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a1, a1, a5
-; CHECK-ALIGNED-RV32-V-NEXT: xor a0, a0, a1
-; CHECK-ALIGNED-RV32-V-NEXT: or a0, a2, a0
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vle8.v v8, (a0)
+; CHECK-ALIGNED-RV32-V-NEXT: vle8.v v9, (a1)
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vmset.m v0
+; CHECK-ALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vcpop.m a0, v8, v0.t
; CHECK-ALIGNED-RV32-V-NEXT: snez a0, a0
; CHECK-ALIGNED-RV32-V-NEXT: ret
;
@@ -1090,17 +1135,102 @@ define i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
; CHECK-ALIGNED-RV64-V-NEXT: snez a0, a0
; CHECK-ALIGNED-RV64-V-NEXT: ret
;
-; CHECK-UNALIGNED-LABEL: bcmp_size_6:
-; CHECK-UNALIGNED: # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT: lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT: lhu a0, 4(a0)
-; CHECK-UNALIGNED-NEXT: lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT: lhu a1, 4(a1)
-; CHECK-UNALIGNED-NEXT: xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT: xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT: or a0, a2, a0
-; CHECK-UNALIGNED-NEXT: snez a0, a0
-; CHECK-UNALIGNED-NEXT: ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT: lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT: lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-NEXT: ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT: lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT: lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT: ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT: vmset.m v0
+; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT: ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT: lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT: lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT: ret
entry:
%bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 6)
ret i32 %bcmp
@@ -1337,41 +1467,14 @@ define i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize {
;
; CHECK-ALIGNED-RV32-V-LABEL: bcmp_size_7:
; CHECK-ALIGNED-RV32-V: # %bb.0: # %entry
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a2, 1(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 3(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a2, a2, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a5, a4
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 1(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: or a2, a4, a2
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 3(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a5, a3
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 24
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a6, a4
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a4, a3
-; CHECK-ALIGNED-RV32-V-NEXT: xor a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 4(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 5(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a0, 6(a0)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 4(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 5(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a3, a4, a3
-; CHECK-ALIGNED-RV32-V-NEXT: lbu a1, 6(a1)
-; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 8
-; CHECK-ALIGNED-RV32-V-NEXT: or a4, a6, a5
-; CHECK-ALIGNED-RV32-V-NEXT: xor a3, a3, a4
-; CHECK-ALIGNED-RV32-V-NEXT: xor a0, a0, a1
-; CHECK-ALIGNED-RV32-V-NEXT: or a0, a3, a0
-; CHECK-ALIGNED-RV32-V-NEXT: or a0, a2, a0
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 7, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vle8.v v8, (a0)
+; CHECK-ALIGNED-RV32-V-NEXT: vle8.v v9, (a1)
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vmset.m v0
+; CHECK-ALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9
+; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 7, e8, mf2, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT: vcpop.m a0, v8, v0.t
; CHECK-ALIGNED-RV32-V-NEXT: snez a0, a0
; CHECK-ALIGNED-RV32-V-NEXT: ret
;
@@ -1415,17 +1518,102 @@ define i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize {
; CHECK-ALIGNED-RV64-V-NEXT: snez a0, a0
; CHECK-ALIGNED-RV64-V-NEXT: ret
;
-; CHECK-UNALIGNED-LABEL: bcmp_size_7:
-; CHECK-UNALIGNED: # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT: lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT: lw a0, 3(a0)
-; CHECK-UNALIGNED-NEXT: lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT: lw a1, 3(a1)
-; CHECK-UNALIGNED-NEXT: xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT: xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT: or a0, a2, a0
-; CHECK-UNALIGNED-NEXT: snez a0, a0
-; CHECK-UNALIGNED-NEXT: ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32: # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT: lw a0, 3(a0)
+; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT: lw a1, 3(a1)
+; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0
+; C...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/114971
More information about the llvm-branch-commits
mailing list