[clang] [llvm] [RISC-V] Add support for MIPS P8700 CPU (PR #117865)
Djordje Todorovic via cfe-commits
cfe-commits at lists.llvm.org
Wed Nov 27 03:55:06 PST 2024
https://github.com/djtodoro updated https://github.com/llvm/llvm-project/pull/117865
>From a3ed3550aed41e102bf41ec347f19f9a3c339b2b Mon Sep 17 00:00:00 2001
From: Djordje Todorovic <djordje.todorovic at htecgroup.com>
Date: Tue, 8 Mar 2022 18:53:42 +0100
Subject: [PATCH 1/6] [RISCV] Add MIPS p8700 scheduler and target cpu
Add support for MIPS's p8700 CPU.
co-authored by Nemanja Rakovic <nemanja.rakovic at htecgroup.com>
---
.../test/Misc/target-invalid-cpu-note/riscv.c | 2 +
llvm/lib/Target/RISCV/RISCV.td | 1 +
llvm/lib/Target/RISCV/RISCVFeatures.td | 4 +
llvm/lib/Target/RISCV/RISCVProcessors.td | 14 +-
llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td | 279 ++++++++++++++++++
llvm/lib/Target/RISCV/RISCVSchedule.td | 1 +
6 files changed, 300 insertions(+), 1 deletion(-)
create mode 100644 llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td
diff --git a/clang/test/Misc/target-invalid-cpu-note/riscv.c b/clang/test/Misc/target-invalid-cpu-note/riscv.c
index 8c5df5884cd791..fc8536d99cb804 100644
--- a/clang/test/Misc/target-invalid-cpu-note/riscv.c
+++ b/clang/test/Misc/target-invalid-cpu-note/riscv.c
@@ -25,6 +25,7 @@
// RISCV64: error: unknown target CPU 'not-a-cpu'
// RISCV64-NEXT: note: valid target CPU values are:
// RISCV64-SAME: {{^}} generic-rv64
+// RISCV64-SAME: {{^}}, mips-p8700
// RISCV64-SAME: {{^}}, rocket-rv64
// RISCV64-SAME: {{^}}, sifive-p450
// RISCV64-SAME: {{^}}, sifive-p470
@@ -72,6 +73,7 @@
// TUNE-RISCV64: error: unknown target CPU 'not-a-cpu'
// TUNE-RISCV64-NEXT: note: valid target CPU values are:
// TUNE-RISCV64-SAME: {{^}} generic-rv64
+// TUNE-RISCV64-SAME: {{^}}, mips-p8700
// TUNE-RISCV64-SAME: {{^}}, rocket-rv64
// TUNE-RISCV64-SAME: {{^}}, sifive-p450
// TUNE-RISCV64-SAME: {{^}}, sifive-p470
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 00c3d702e12a22..1df6f9ae1944c8 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -46,6 +46,7 @@ include "RISCVMacroFusion.td"
// RISC-V Scheduling Models
//===----------------------------------------------------------------------===//
+include "RISCVSchedMIPSP8700.td"
include "RISCVSchedRocket.td"
include "RISCVSchedSiFive7.td"
include "RISCVSchedSiFiveP400.td"
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 26e96cf831af76..3380f2e5c29acb 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1447,6 +1447,10 @@ def TuneConditionalCompressedMoveFusion
def HasConditionalMoveFusion : Predicate<"Subtarget->hasConditionalMoveFusion()">;
def NoConditionalMoveFusion : Predicate<"!Subtarget->hasConditionalMoveFusion()">;
+def TuneMIPSP8700
+ : SubtargetFeature<"mips-p8700", "RISCVProcFamily", "Others",
+ "MIPS p8700 processor">;
+
def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
"SiFive 7-Series processors">;
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 03a48ff3c17586..cae4fdd88d558f 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -78,6 +78,19 @@ def GENERIC_RV64 : RISCVProcessorModel<"generic-rv64",
// to change to the appropriate rv32/rv64 version.
def GENERIC : RISCVTuneProcessorModel<"generic", NoSchedModel>, GenericTuneInfo;
+def MIPS_P8700 : RISCVProcessorModel<"mips-p8700",
+ MIPSP8700Model,
+ [Feature64Bit,
+ FeatureStdExtI,
+ FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtD,
+ FeatureStdExtC,
+ FeatureStdExtZba,
+ FeatureStdExtZbb],
+ [TuneMIPSP8700]>;
+
def ROCKET_RV32 : RISCVProcessorModel<"rocket-rv32",
RocketModel,
[Feature32Bit,
@@ -279,7 +292,6 @@ def SIFIVE_P470 : RISCVProcessorModel<"sifive-p470", SiFiveP400Model,
!listconcat(SiFiveP400TuneFeatures,
[TuneNoSinkSplatOperands])>;
-
def SIFIVE_P670 : RISCVProcessorModel<"sifive-p670", SiFiveP600Model,
!listconcat(RVA22U64Features,
[FeatureStdExtV,
diff --git a/llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td b/llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td
new file mode 100644
index 00000000000000..75f015d97df198
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td
@@ -0,0 +1,279 @@
+//===-- RISCVSchedMIPSP8700.td - MIPS RISC-V Processor -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// RISC-V processor by MIPS.
+//===----------------------------------------------------------------------===//
+
+def MIPSP8700Model : SchedMachineModel {
+ int IssueWidth = 4;
+ int MicroOpBufferSize = 96; // as per the specification
+ int LoadLatency = 4;
+ int MispredictPenalty = 8; // TODO: Estimated
+ let CompleteModel = 0;
+}
+
+let SchedModel = MIPSP8700Model in {
+
+// Handle ALQ Pipelines.
+def p8700ALQ : ProcResource<1> { let BufferSize = 16; }
+def p8700IssueALU : ProcResource<1> { let Super = p8700ALQ; }
+
+
+// Handle AGQ Pipelines.
+def p8700AGQ : ProcResource<3> { let BufferSize = 16; }
+def p8700IssueAL2 : ProcResource<1> { let Super = p8700AGQ; }
+def p8700IssueCTISTD : ProcResource<1> { let Super = p8700AGQ; }
+def p8700IssueLDST : ProcResource<1> { let Super = p8700AGQ; }
+def p8700GpDiv : ProcResource<1>;
+def p8700GpMul : ProcResource<1>;
+def p8700WriteEitherALU : ProcResGroup<[p8700IssueALU, p8700IssueAL2]>;
+
+let Latency = 1 in {
+def : WriteRes<WriteIALU, [p8700WriteEitherALU]>;
+def : WriteRes<WriteIALU32, [p8700WriteEitherALU]>;
+def : WriteRes<WriteShiftImm, [p8700WriteEitherALU]>;
+def : WriteRes<WriteShiftImm32, [p8700WriteEitherALU]>;
+def : WriteRes<WriteShiftReg, [p8700WriteEitherALU]>;
+def : WriteRes<WriteShiftReg32, [p8700WriteEitherALU]>;
+
+// Handle zba.
+def : WriteRes<WriteSHXADD, [p8700WriteEitherALU]>;
+def : WriteRes<WriteSHXADD32, [p8700WriteEitherALU]>;
+
+// Handle zbb.
+def : WriteRes<WriteRotateReg, [p8700WriteEitherALU]>;
+def : WriteRes<WriteRotateImm, [p8700WriteEitherALU]>;
+def : WriteRes<WriteCLZ, [p8700WriteEitherALU]>;
+def : WriteRes<WriteCTZ, [p8700WriteEitherALU]>;
+def : WriteRes<WriteCPOP, [p8700WriteEitherALU]>;
+def : WriteRes<WriteRotateReg32, [p8700WriteEitherALU]>;
+def : WriteRes<WriteRotateImm32, [p8700WriteEitherALU]>;
+def : WriteRes<WriteCLZ32, [p8700WriteEitherALU]>;
+def : WriteRes<WriteCTZ32, [p8700WriteEitherALU]>;
+def : WriteRes<WriteCPOP32, [p8700WriteEitherALU]>;
+def : WriteRes<WriteREV8, [p8700WriteEitherALU]>;
+def : WriteRes<WriteORCB, [p8700WriteEitherALU]>;
+def : WriteRes<WriteIMinMax, []>;
+}
+
+let Latency = 0 in {
+def : WriteRes<WriteNop, [p8700WriteEitherALU]>;
+}
+
+let Latency = 4 in {
+def : WriteRes<WriteLDB, [p8700IssueLDST]>;
+def : WriteRes<WriteLDH, [p8700IssueLDST]>;
+def : WriteRes<WriteLDW, [p8700IssueLDST]>;
+def : WriteRes<WriteLDD, [p8700IssueLDST]>;
+
+def : WriteRes<WriteAtomicW, [p8700IssueLDST]>;
+def : WriteRes<WriteAtomicD, [p8700IssueLDST]>;
+def : WriteRes<WriteAtomicLDW, [p8700IssueLDST]>;
+def : WriteRes<WriteAtomicLDD, [p8700IssueLDST]>;
+}
+
+let Latency = 8 in {
+def : WriteRes<WriteFLD32, [p8700IssueLDST]>;
+def : WriteRes<WriteFLD64, [p8700IssueLDST]>;
+}
+
+let Latency = 3 in {
+def : WriteRes<WriteSTB, [p8700IssueLDST]>;
+def : WriteRes<WriteSTH, [p8700IssueLDST]>;
+def : WriteRes<WriteSTW, [p8700IssueLDST]>;
+def : WriteRes<WriteSTD, [p8700IssueLDST]>;
+
+def : WriteRes<WriteAtomicSTW, [p8700IssueLDST]>;
+def : WriteRes<WriteAtomicSTD, [p8700IssueLDST]>;
+}
+
+let Latency = 1 in {
+def : WriteRes<WriteFST32, [p8700IssueLDST]>;
+def : WriteRes<WriteFST64, [p8700IssueLDST]>;
+}
+
+let Latency = 7 in {
+def : WriteRes<WriteFMovI32ToF32, [p8700IssueLDST]>;
+def : WriteRes<WriteFMovF32ToI32, [p8700IssueLDST]>;
+def : WriteRes<WriteFMovI64ToF64, [p8700IssueLDST]>;
+def : WriteRes<WriteFMovF64ToI64, [p8700IssueLDST]>;
+}
+
+let Latency = 4 in {
+def : WriteRes<WriteIMul, [p8700GpMul]>;
+def : WriteRes<WriteIMul32, [p8700GpMul]>;
+}
+
+let Latency = 8, ReleaseAtCycles = [5] in {
+def : WriteRes<WriteIDiv, [p8700GpDiv]>;
+def : WriteRes<WriteIDiv32, [p8700GpDiv]>;
+}
+
+def : WriteRes<WriteIRem, []>;
+def : WriteRes<WriteIRem32, []>;
+
+// Handle CTISTD Pipeline.
+let Latency = 1 in {
+def : WriteRes<WriteJmp, [p8700IssueCTISTD]>;
+def : WriteRes<WriteJmpReg, [p8700IssueCTISTD]>;
+}
+
+let Latency = 2 in {
+def : WriteRes<WriteJal, [p8700IssueCTISTD]>;
+def : WriteRes<WriteJalr, [p8700IssueCTISTD]>;
+}
+
+// Handle FPU Pipelines.
+def p8700FPQ : ProcResource<3> { let BufferSize = 16; }
+def p8700IssueFPUS : ProcResource<1> { let Super = p8700FPQ; }
+def p8700IssueFPUL : ProcResource<1> { let Super = p8700FPQ; }
+def p8700IssueFPULoad : ProcResource<1> { let Super = p8700FPQ; }
+def p8700FpuApu : ProcResource<1>;
+def p8700FpuLong : ProcResource<1>;
+
+let Latency = 4, ReleaseAtCycles = [1, 1] in {
+def : WriteRes<WriteFCvtI32ToF32, [p8700IssueFPUL, p8700FpuApu]>;
+def : WriteRes<WriteFCvtI32ToF64, [p8700IssueFPUL, p8700FpuApu]>;
+def : WriteRes<WriteFCvtI64ToF32, [p8700IssueFPUL, p8700FpuApu]>;
+def : WriteRes<WriteFCvtI64ToF64, [p8700IssueFPUL, p8700FpuApu]>;
+def : WriteRes<WriteFCvtF32ToI32, [p8700IssueFPUL, p8700FpuApu]>;
+def : WriteRes<WriteFCvtF32ToI64, [p8700IssueFPUL, p8700FpuApu]>;
+def : WriteRes<WriteFCvtF32ToF64, [p8700IssueFPUL, p8700FpuApu]>;
+def : WriteRes<WriteFCvtF64ToI32, [p8700IssueFPUL, p8700FpuApu]>;
+def : WriteRes<WriteFCvtF64ToI64, [p8700IssueFPUL, p8700FpuApu]>;
+def : WriteRes<WriteFCvtF64ToF32, [p8700IssueFPUL, p8700FpuApu]>;
+
+def : WriteRes<WriteFAdd32, [p8700IssueFPUL, p8700FpuApu]>;
+def : WriteRes<WriteFAdd64, [p8700IssueFPUL, p8700FpuApu]>;
+}
+
+let Latency = 2, ReleaseAtCycles = [1, 1] in {
+def : WriteRes<WriteFSGNJ32, [p8700IssueFPUS, p8700FpuApu]>;
+def : WriteRes<WriteFMinMax32, [p8700IssueFPUS, p8700FpuApu]>;
+def : WriteRes<WriteFSGNJ64, [p8700IssueFPUS, p8700FpuApu]>;
+def : WriteRes<WriteFMinMax64, [p8700IssueFPUS, p8700FpuApu]>;
+
+def : WriteRes<WriteFCmp32, [p8700IssueFPUS, p8700FpuApu]>;
+def : WriteRes<WriteFCmp64, [p8700IssueFPUS, p8700FpuApu]>;
+def : WriteRes<WriteFClass32, [p8700IssueFPUS, p8700FpuApu]>;
+def : WriteRes<WriteFClass64, [p8700IssueFPUS, p8700FpuApu]>;
+}
+
+let Latency = 8, ReleaseAtCycles = [1, 1] in {
+def : WriteRes<WriteFMA32, [p8700FpuLong, p8700FpuApu]>;
+def : WriteRes<WriteFMA64, [p8700FpuLong, p8700FpuApu]>;
+}
+
+let Latency = 5, ReleaseAtCycles = [1, 1] in {
+def : WriteRes<WriteFMul32, [p8700FpuLong, p8700FpuApu]>;
+def : WriteRes<WriteFMul64, [p8700FpuLong, p8700FpuApu]>;
+}
+
+let Latency = 17, ReleaseAtCycles = [1, 17] in {
+def : WriteRes<WriteFDiv32, [p8700FpuLong, p8700FpuApu]>;
+def : WriteRes<WriteFSqrt32, [p8700FpuLong, p8700FpuApu]>;
+
+def : WriteRes<WriteFDiv64, [p8700IssueFPUL, p8700FpuApu]>;
+def : WriteRes<WriteFSqrt64, [p8700IssueFPUL, p8700FpuApu]>;
+}
+
+def : WriteRes<WriteCSR, [p8700ALQ]>;
+
+// Bypass and advance.
+def : ReadAdvance<ReadIALU, 0>;
+def : ReadAdvance<ReadIALU32, 0>;
+def : ReadAdvance<ReadShiftImm, 0>;
+def : ReadAdvance<ReadShiftImm32, 0>;
+def : ReadAdvance<ReadShiftReg, 0>;
+def : ReadAdvance<ReadShiftReg32, 0>;
+def : ReadAdvance<ReadSHXADD, 0>;
+def : ReadAdvance<ReadSHXADD32, 0>;
+def : ReadAdvance<ReadRotateReg, 0>;
+def : ReadAdvance<ReadRotateImm, 0>;
+def : ReadAdvance<ReadCLZ, 0>;
+def : ReadAdvance<ReadCTZ, 0>;
+def : ReadAdvance<ReadCPOP, 0>;
+def : ReadAdvance<ReadRotateReg32, 0>;
+def : ReadAdvance<ReadRotateImm32, 0>;
+def : ReadAdvance<ReadCLZ32, 0>;
+def : ReadAdvance<ReadCTZ32, 0>;
+def : ReadAdvance<ReadCPOP32, 0>;
+def : ReadAdvance<ReadREV8, 0>;
+def : ReadAdvance<ReadORCB, 0>;
+def : ReadAdvance<ReadIMul, 0>;
+def : ReadAdvance<ReadIMul32, 0>;
+def : ReadAdvance<ReadIDiv, 0>;
+def : ReadAdvance<ReadIDiv32, 0>;
+def : ReadAdvance<ReadJmp, 0>;
+def : ReadAdvance<ReadJalr, 0>;
+def : ReadAdvance<ReadFMovI32ToF32, 0>;
+def : ReadAdvance<ReadFMovF32ToI32, 0>;
+def : ReadAdvance<ReadFMovI64ToF64, 0>;
+def : ReadAdvance<ReadFMovF64ToI64, 0>;
+def : ReadAdvance<ReadFSGNJ32, 0>;
+def : ReadAdvance<ReadFMinMax32, 0>;
+def : ReadAdvance<ReadFSGNJ64, 0>;
+def : ReadAdvance<ReadFMinMax64, 0>;
+def : ReadAdvance<ReadFCmp32, 0>;
+def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFCvtI32ToF32, 0>;
+def : ReadAdvance<ReadFCvtI32ToF64, 0>;
+def : ReadAdvance<ReadFCvtI64ToF32, 0>;
+def : ReadAdvance<ReadFCvtI64ToF64, 0>;
+def : ReadAdvance<ReadFCvtF32ToI32, 0>;
+def : ReadAdvance<ReadFCvtF32ToI64, 0>;
+def : ReadAdvance<ReadFCvtF32ToF64, 0>;
+def : ReadAdvance<ReadFCvtF64ToI32, 0>;
+def : ReadAdvance<ReadFCvtF64ToI64, 0>;
+def : ReadAdvance<ReadFCvtF64ToF32, 0>;
+def : ReadAdvance<ReadFAdd32, 0>;
+def : ReadAdvance<ReadFAdd64, 0>;
+def : ReadAdvance<ReadFMul32, 0>;
+def : ReadAdvance<ReadFMul64, 0>;
+def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
+def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
+def : ReadAdvance<ReadFDiv32, 0>;
+def : ReadAdvance<ReadFSqrt32, 0>;
+def : ReadAdvance<ReadFDiv64, 0>;
+def : ReadAdvance<ReadFSqrt64, 0>;
+def : ReadAdvance<ReadAtomicWA, 0>;
+def : ReadAdvance<ReadAtomicWD, 0>;
+def : ReadAdvance<ReadAtomicDA, 0>;
+def : ReadAdvance<ReadAtomicDD, 0>;
+def : ReadAdvance<ReadAtomicLDW, 0>;
+def : ReadAdvance<ReadAtomicLDD, 0>;
+def : ReadAdvance<ReadAtomicSTW, 0>;
+def : ReadAdvance<ReadAtomicSTD, 0>;
+def : ReadAdvance<ReadFStoreData, 0>;
+def : ReadAdvance<ReadCSR, 0>;
+def : ReadAdvance<ReadMemBase, 0>;
+def : ReadAdvance<ReadStoreData, 0>;
+def : ReadAdvance<ReadFMemBase, 0>;
+def : ReadAdvance<ReadFClass32, 0>;
+def : ReadAdvance<ReadFClass64, 0>;
+def : ReadAdvance<ReadIMinMax, 0>;
+def : ReadAdvance<ReadIRem, 0>;
+def : ReadAdvance<ReadIRem32, 0>;
+
+// Unsupported extensions.
+defm : UnsupportedSchedV;
+defm : UnsupportedSchedZbc;
+defm : UnsupportedSchedZbs;
+defm : UnsupportedSchedZbkb;
+defm : UnsupportedSchedZbkx;
+defm : UnsupportedSchedZfa;
+defm : UnsupportedSchedZfh;
+defm : UnsupportedSchedSFB;
+defm : UnsupportedSchedZabha;
+defm : UnsupportedSchedXsfvcp;
+defm : UnsupportedSchedZvk;
+defm : UnsupportedSchedZvkned;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td
index 1fdbc7cbcbaf4a..114261b0b7680a 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -22,6 +22,7 @@ def WriteIMul32 : SchedWrite; // 32-bit multiply on RV64I
def WriteJmp : SchedWrite; // Jump
def WriteJal : SchedWrite; // Jump and link
def WriteJalr : SchedWrite; // Jump and link register
+def WriteJmpReg : SchedWrite; // Jump register
def WriteNop : SchedWrite;
def WriteLDB : SchedWrite; // Load byte
def WriteLDH : SchedWrite; // Load half-word
>From 9b0b29c8092604499498c29f2615f8510ec04b33 Mon Sep 17 00:00:00 2001
From: Djordje Todorovic <djordje.todorovic at htecgroup.com>
Date: Fri, 11 Oct 2024 14:27:35 +0200
Subject: [PATCH 2/6] [RISCV] Add MIPS extensions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Introduce MIPS extensions by adding:
1) ccmov (+xmipscmov)
2) load/store pairs (+xmipslsp)
The -mload-store-pairs imply l[w|d]p and s[w|d]p usage.
The `ccmov` feature is ON by default, so we introduce
an option for disabling the feature - `-mno-ccmov`.
co-authored by Dragan Mladjenovic <Dragan.Mladjenovic at syrmia.com>
co-authored by Jovan Dmitrović <jovan.dmitrovic at htecgroup.com>
---
clang/include/clang/Driver/Options.td | 4 +
clang/lib/Driver/ToolChains/Clang.cpp | 14 +
llvm/docs/RISCVUsage.rst | 6 +
.../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 10 +
llvm/lib/Target/RISCV/CMakeLists.txt | 1 +
.../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 1 +
llvm/lib/Target/RISCV/RISCV.h | 2 +
llvm/lib/Target/RISCV/RISCVFeatures.td | 13 +
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 +-
llvm/lib/Target/RISCV/RISCVInstrFormats.td | 72 +++
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 42 ++
llvm/lib/Target/RISCV/RISCVInstrInfo.h | 9 +
llvm/lib/Target/RISCV/RISCVInstrInfo.td | 220 ++++++++
llvm/lib/Target/RISCV/RISCVInstrInfoC.td | 125 -----
.../Target/RISCV/RISCVLoadStoreOptimizer.cpp | 371 +++++++++++++
llvm/lib/Target/RISCV/RISCVProcessors.td | 4 +-
llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 17 +
llvm/lib/Target/RISCV/RISCVSubtarget.h | 2 +
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 20 +
llvm/test/CodeGen/RISCV/GlobalISel/rv64zba.ll | 28 +-
llvm/test/CodeGen/RISCV/O0-pipeline.ll | 1 +
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 2 +
llvm/test/CodeGen/RISCV/load-store-pair.ll | 509 ++++++++++++++++++
llvm/test/CodeGen/RISCV/rv64zba.ll | 2 +-
llvm/test/CodeGen/RISCV/select-and.ll | 25 +
llvm/test/CodeGen/RISCV/select-bare.ll | 14 +
llvm/test/CodeGen/RISCV/select-cc.ll | 86 +++
llvm/test/CodeGen/RISCV/select-or.ll | 25 +
28 files changed, 1487 insertions(+), 142 deletions(-)
create mode 100644 llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
create mode 100644 llvm/test/CodeGen/RISCV/load-store-pair.ll
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 808f089914c9bb..5527be23aa3acd 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4937,6 +4937,10 @@ def msave_restore : Flag<["-"], "msave-restore">, Group<m_riscv_Features_Group>,
def mno_save_restore : Flag<["-"], "mno-save-restore">, Group<m_riscv_Features_Group>,
HelpText<"Disable using library calls for save and restore">;
} // let Flags = [TargetSpecific]
+def mload_store_pairs : Flag<["-"], "mload-store-pairs">, Group<m_riscv_Features_Group>;
+def mno_load_store_pairs : Flag<["-"], "mno-load-store-pairs">, Group<m_riscv_Features_Group>;
+def mccmov : Flag<["-"], "mccmov">, Group<m_riscv_Features_Group>;
+def mno_ccmov : Flag<["-"], "mno-ccmov">, Group<m_riscv_Features_Group>;
let Flags = [TargetSpecific] in {
def menable_experimental_extensions : Flag<["-"], "menable-experimental-extensions">, Group<m_Group>,
HelpText<"Enable use of experimental RISC-V extensions.">;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 217c1a845f0a47..56acc93a3ed9d7 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2164,6 +2164,20 @@ void Clang::AddRISCVTargetArgs(const ArgList &Args,
CmdArgs.push_back(A->getValue());
}
+ if (Arg *A = Args.getLastArg(options::OPT_mload_store_pairs,
+ options::OPT_mno_load_store_pairs)) {
+ if (A->getOption().matches(options::OPT_mload_store_pairs)) {
+ CmdArgs.push_back("-mllvm");
+ CmdArgs.push_back("-riscv-load-store-pairs=1");
+ }
+ }
+
+ if (Arg *A = Args.getLastArg(options::OPT_mccmov, options::OPT_mno_ccmov)) {
+ if (A->getOption().matches(options::OPT_mno_ccmov)) {
+ CmdArgs.push_back("-mllvm");
+ CmdArgs.push_back("-riscv-ccmov=0");
+ }
+ }
// Handle -mrvv-vector-bits=<bits>
if (Arg *A = Args.getLastArg(options::OPT_mrvv_vector_bits_EQ)) {
StringRef Val = A->getValue();
diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst
index bac267591e0152..ad4e7bca11b653 100644
--- a/llvm/docs/RISCVUsage.rst
+++ b/llvm/docs/RISCVUsage.rst
@@ -426,6 +426,12 @@ The current vendor extensions supported are:
``Xwchc``
LLVM implements `the custom compressed opcodes present in some QingKe cores` by WCH / Nanjing Qinheng Microelectronics. The vendor refers to these opcodes by the name "XW".
+``xmipscmove``
+ LLVM implements conditional move for the `p8700 processor <https://mips.com/products/hardware/p8700/>` by MIPS.
+
+``xmipslsp``
+ LLVM implements load/store pair instructions for the `p8700 processor <https://mips.com/products/hardware/p8700/>` by MIPS.
+
Experimental C Intrinsics
=========================
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index b843bb5ae43100..4edeb07a0fc980 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -855,6 +855,16 @@ struct RISCVOperand final : public MCParsedAsmOperand {
VK == RISCVMCExpr::VK_RISCV_None;
}
+ bool isUImm7Lsb000() const {
+ if (!isImm())
+ return false;
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isShiftedUInt<4, 3>(Imm) &&
+ VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
bool isUImm8Lsb00() const {
if (!isImm())
return false;
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index fd049d1a57860e..5a353a1c690b9e 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -47,6 +47,7 @@ add_llvm_target(RISCVCodeGen
RISCVISelLowering.cpp
RISCVLandingPadSetup.cpp
RISCVMachineFunctionInfo.cpp
+ RISCVLoadStoreOptimizer.cpp
RISCVMergeBaseOffset.cpp
RISCVOptWInstrs.cpp
RISCVPostRAExpandPseudoInsts.cpp
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index ca2f868cd4e764..79889fba75e809 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -306,6 +306,7 @@ enum OperandType : unsigned {
OPERAND_UIMM6_LSB0,
OPERAND_UIMM7,
OPERAND_UIMM7_LSB00,
+ OPERAND_UIMM7_LSB000,
OPERAND_UIMM8_LSB00,
OPERAND_UIMM8,
OPERAND_UIMM8_LSB000,
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index d7bab601d545cc..b1aee98739e852 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -84,6 +84,8 @@ void initializeRISCVMoveMergePass(PassRegistry &);
FunctionPass *createRISCVPushPopOptimizationPass();
void initializeRISCVPushPopOptPass(PassRegistry &);
+FunctionPass *createRISCVLoadStoreOptPass();
+void initializeRISCVLoadStoreOptPass(PassRegistry &);
FunctionPass *createRISCVZacasABIFixPass();
void initializeRISCVZacasABIFixPass(PassRegistry &);
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 3380f2e5c29acb..6ede46d6fa31a3 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1450,6 +1450,19 @@ def NoConditionalMoveFusion : Predicate<"!Subtarget->hasConditionalMoveFusion()
def TuneMIPSP8700
: SubtargetFeature<"mips-p8700", "RISCVProcFamily", "Others",
"MIPS p8700 processor">;
+def FeatureMIPSCMov : SubtargetFeature<"xmipscmov", "HasMIPSCMov",
+ "true", "Using CCMov",
+ [Feature64Bit]>;
+def UsesMIPSCMov
+ : Predicate<"Subtarget->useCCMovInsn()">,
+ AssemblerPredicate<(all_of FeatureMIPSCMov), "'ccmov' instruction">;
+def FeatureMIPSLoadStorePairs
+ : SubtargetFeature<"xmipslsp", "HasMIPSLSP", "true",
+ "Optimize for hardware load-store bonding">;
+def UsesMIPSLoadStorePairs
+ : Predicate<"Subtarget->useLoadStorePairs()">,
+ AssemblerPredicate<(all_of FeatureMIPSLoadStorePairs),
+ "load and store pair instructions">;
def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
"SiFive 7-Series processors">;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 329b42d621ceec..9d013c47b1deb4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -408,7 +408,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ABS, MVT::i32, Custom);
}
- if (!Subtarget.hasVendorXTHeadCondMov())
+ if (Subtarget.hasMIPSCMov())
+ setOperationAction(ISD::SELECT, XLenVT, Legal);
+ else if (!Subtarget.hasVendorXTHeadCondMov())
setOperationAction(ISD::SELECT, XLenVT, Custom);
static const unsigned FPLegalNodeTypes[] = {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 013c26c72bfd55..9ffed2c80ad6d3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -514,6 +514,78 @@ class RVInstJ<RISCVOpcode opcode, dag outs, dag ins, string opcodestr,
let Inst{6-0} = opcode.Value;
}
+//===----------------------------------------------------------------------===//
+// MIPS custom instruction formats
+//===----------------------------------------------------------------------===//
+
+// Load double pair format.
+class LDPFormat<dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ bits<7> imm7;
+ bits<5> rs1;
+ bits<5> rd1;
+ bits<5> rd2;
+
+ let Inst{31-27} = rd2;
+ let Inst{26-23} = imm7{6-3};
+ let Inst{22-20} = 0b000;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = 0b100;
+ let Inst{11-7} = rd1;
+ let Inst{6-0} = 0b0001011;
+}
+
+// Load word pair format.
+class LWPFormat<dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ bits<7> imm7;
+ bits<5> rs1;
+ bits<5> rd1;
+ bits<5> rd2;
+
+ let Inst{31-27} = rd2;
+ let Inst{26-22} = imm7{6-2};
+ let Inst{21-20} = 0b01;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = 0b100;
+ let Inst{11-7} = rd1;
+ let Inst{6-0} = 0b0001011;
+}
+
+// Store double pair format.
+class SDPFormat<dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ bits<7> imm7;
+ bits<5> rs3;
+ bits<5> rs2;
+ bits<5> rs1;
+
+ let Inst{31-27} = rs3;
+ let Inst{26-25} = imm7{6-5};
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = 0b101;
+ let Inst{11-10} = imm7{4-3};
+ let Inst{9-0} = 0b0000001011;
+}
+
+// Store word pair format.
+class SWPFormat<dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ bits<7> imm7;
+ bits<5> rs3;
+ bits<5> rs2;
+ bits<5> rs1;
+
+ let Inst{31-27} = rs3;
+ let Inst{26-25} = imm7{6-5};
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = 0b101;
+ let Inst{11-9} = imm7{4-2};
+ let Inst{8-0} = 0b010001011;
+}
+
//===----------------------------------------------------------------------===//
// Instruction classes for .insn directives
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 47273d6bc06d65..ad7abf4f2770ac 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2464,6 +2464,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
case RISCVOp::OPERAND_UIMM7_LSB00:
Ok = isShiftedUInt<5, 2>(Imm);
break;
+ case RISCVOp::OPERAND_UIMM7_LSB000:
+ Ok = isShiftedUInt<4, 3>(Imm);
+ break;
case RISCVOp::OPERAND_UIMM8_LSB00:
Ok = isShiftedUInt<6, 2>(Imm);
break;
@@ -2710,6 +2713,45 @@ MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
.setMemRefs(MemI.memoperands())
.setMIFlags(MemI.getFlags());
}
+bool RISCVInstrInfo::isPairableLdStInstOpc(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return false;
+ case RISCV::SH:
+ case RISCV::LH:
+ case RISCV::LHU:
+ case RISCV::SW:
+ case RISCV::FSW:
+ case RISCV::LW:
+ case RISCV::FLW:
+ case RISCV::SD:
+ case RISCV::FSD:
+ case RISCV::LD:
+ case RISCV::FLD:
+ return true;
+ }
+}
+
+bool RISCVInstrInfo::isLdStSafeToPair(const MachineInstr &LdSt,
+ const TargetRegisterInfo *TRI) {
+ // If this is a volatile load/store, don't mess with it.
+ if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
+ return false;
+
+ if (LdSt.getOperand(1).isFI())
+ return true;
+
+ assert(LdSt.getOperand(1).isReg() && "Expected a reg operand.");
+ // Can't cluster if the instruction modifies the base register
+ // or it is update form. e.g. ld x5,8(x5)
+ if (LdSt.modifiesRegister(LdSt.getOperand(1).getReg(), TRI))
+ return false;
+
+ if (!LdSt.getOperand(2).isImm())
+ return false;
+
+ return true;
+}
bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 005cba5d35610e..3daa6fdceffaf9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -298,6 +298,15 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
unsigned getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
+ /// Return true if pairing the given load or store may be paired with another.
+ static bool isPairableLdStInstOpc(unsigned Opc);
+
+ static bool isLdStSafeToPair(const MachineInstr &LdSt,
+ const TargetRegisterInfo *TRI);
+
+ std::optional<std::pair<unsigned, unsigned>>
+ isRVVSpillForZvlsseg(unsigned Opcode) const;
+
protected:
const RISCVSubtarget &STI;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 5747f05ffafd47..cc4bfa95981ed7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -251,6 +251,147 @@ def simm12 : RISCVSImmLeafOp<12> {
}];
}
+// A 7-bit unsigned immediate where the least significant two bits are zero.
+def uimm7_lsb00 : RISCVOp,
+ ImmLeaf<XLenVT, [{return isShiftedUInt<5, 2>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<7, "Lsb00">;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeUImmOperand<7>";
+ let OperandType = "OPERAND_UIMM7_LSB00";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isShiftedUInt<5, 2>(Imm);
+ }];
+}
+
+// A 7-bit unsigned immediate where the least significant three bits are zero.
+def uimm7_lsb000 : RISCVOp,
+ ImmLeaf<XLenVT, [{return isShiftedUInt<4, 3>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<7, "Lsb000">;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeUImmOperand<7>";
+ let OperandType = "OPERAND_UIMM7_LSB000";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isShiftedUInt<4, 3>(Imm);
+ }];
+}
+
+// A 8-bit unsigned immediate where the least significant two bits are zero.
+def uimm8_lsb00 : RISCVOp,
+ ImmLeaf<XLenVT, [{return isShiftedUInt<6, 2>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<8, "Lsb00">;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeUImmOperand<8>";
+ let OperandType = "OPERAND_UIMM8_LSB00";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isShiftedUInt<6, 2>(Imm);
+ }];
+}
+
+// A 8-bit unsigned immediate where the least significant three bits are zero.
+def uimm8_lsb000 : RISCVOp,
+ ImmLeaf<XLenVT, [{return isShiftedUInt<5, 3>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<8, "Lsb000">;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeUImmOperand<8>";
+ let OperandType = "OPERAND_UIMM8_LSB000";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isShiftedUInt<5, 3>(Imm);
+ }];
+}
+
+// A 9-bit signed immediate where the least significant bit is zero.
+def simm9_lsb0 : Operand<OtherVT>,
+ ImmLeaf<XLenVT, [{return isShiftedInt<8, 1>(Imm);}]> {
+ let ParserMatchClass = SImmAsmOperand<9, "Lsb0">;
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getImmOpValueAsr1";
+ let DecoderMethod = "decodeSImmOperandAndLsl1<9>";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isShiftedInt<8, 1>(Imm);
+ return MCOp.isBareSymbolRef();
+
+ }];
+ let OperandType = "OPERAND_PCREL";
+}
+
+// A 9-bit unsigned immediate where the least significant three bits are zero.
+def uimm9_lsb000 : RISCVOp,
+ ImmLeaf<XLenVT, [{return isShiftedUInt<6, 3>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<9, "Lsb000">;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeUImmOperand<9>";
+ let OperandType = "OPERAND_UIMM9_LSB000";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isShiftedUInt<6, 3>(Imm);
+ }];
+}
+
+// A 10-bit unsigned immediate where the least significant two bits are zero
+// and the immediate can't be zero.
+def uimm10_lsb00nonzero : RISCVOp,
+ ImmLeaf<XLenVT,
+ [{return isShiftedUInt<8, 2>(Imm) && (Imm != 0);}]> {
+ let ParserMatchClass = UImmAsmOperand<10, "Lsb00NonZero">;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeUImmNonZeroOperand<10>";
+ let OperandType = "OPERAND_UIMM10_LSB00_NONZERO";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isShiftedUInt<8, 2>(Imm) && (Imm != 0);
+ }];
+}
+
+// A 10-bit signed immediate where the least significant four bits are zero.
+def simm10_lsb0000nonzero : RISCVOp,
+ ImmLeaf<XLenVT,
+ [{return (Imm != 0) && isShiftedInt<6, 4>(Imm);}]> {
+ let ParserMatchClass = SImmAsmOperand<10, "Lsb0000NonZero">;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeSImmNonZeroOperand<10>";
+ let OperandType = "OPERAND_SIMM10_LSB0000_NONZERO";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isShiftedInt<6, 4>(Imm) && (Imm != 0);
+ }];
+}
+
+// A 12-bit signed immediate where the least significant bit is zero.
+def simm12_lsb0 : Operand<XLenVT>,
+ ImmLeaf<XLenVT, [{return isShiftedInt<11, 1>(Imm);}]> {
+ let ParserMatchClass = SImmAsmOperand<12, "Lsb0">;
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getImmOpValueAsr1";
+ let DecoderMethod = "decodeSImmOperandAndLsl1<12>";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isShiftedInt<11, 1>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+ let OperandType = "OPERAND_PCREL";
+}
+
// A 12-bit signed immediate which cannot fit in 6-bit signed immediate,
// but even negative value fit in 12-bit.
def simm12_no6 : ImmLeaf<XLenVT, [{
@@ -402,6 +543,10 @@ def ixlenimm_li_restricted : Operand<XLenVT> {
// Standalone (codegen-only) immleaf patterns.
+// A 12-bit signed immediate plus one where the imm range will be -2047~2048.
+def simm12_plus1 : ImmLeaf<XLenVT,
+ [{return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>;
+
// A 6-bit constant greater than 32.
def uimm6gt32 : ImmLeaf<XLenVT, [{
return isUInt<6>(Imm) && Imm > 32;
@@ -800,6 +945,13 @@ def SRAW : ALUW_rr<0b0100000, 0b101, "sraw">,
} // IsSignExtendingOpW = 1
} // Predicates = [IsRV64]
+let Predicates = [UsesMIPSCMov], hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+def CCMOV : RVInstR4<0b11, 0b011, OPC_CUSTOM_0, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, GPR:$rs3),
+ "ccmov", "$rd, $rs2, $rs1, $rs3">,
+ Sched<[]>;
+} // Predicates = [UsesMIPSCMov]
+
//===----------------------------------------------------------------------===//
// Privileged instructions
//===----------------------------------------------------------------------===//
@@ -2017,6 +2169,74 @@ def : Pat<(binop_allwusers<add> GPR:$rs1, (AddiPair:$rs2)),
}
//===----------------------------------------------------------------------===//
+
+// MIPS extensions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [UsesMIPSCMov] in {
+def : Pat<(select (XLenVT (setne (XLenVT GPR:$rs2), (XLenVT 0))),
+ (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)),
+ (CCMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(select (XLenVT (seteq (XLenVT GPR:$rs2), (XLenVT 0))),
+ (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)),
+ (CCMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(select (XLenVT (setne (XLenVT GPR:$x), (XLenVT simm12_plus1:$y))),
+ (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)),
+ (CCMOV GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y)), GPR:$rs3)>;
+def : Pat<(select (XLenVT (seteq (XLenVT GPR:$x), (XLenVT simm12_plus1:$y))),
+ (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)),
+ (CCMOV GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y)), GPR:$rs3)>;
+def : Pat<(select (XLenVT (setne (XLenVT GPR:$x), (XLenVT GPR:$y))),
+ (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)),
+ (CCMOV GPR:$rs1, (XOR GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select (XLenVT (seteq (XLenVT GPR:$x), (XLenVT GPR:$y))),
+ (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)),
+ (CCMOV GPR:$rs1, (XOR GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select (XLenVT (setuge (XLenVT GPR:$x), (XLenVT GPR:$y))),
+ (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)),
+ (CCMOV GPR:$rs1, (SLTU GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select (XLenVT (setule (XLenVT GPR:$y), (XLenVT GPR:$x))),
+ (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)),
+ (CCMOV GPR:$rs1, (SLTU GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select (XLenVT (setge (XLenVT GPR:$x), (XLenVT GPR:$y))),
+ (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)),
+ (CCMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select (XLenVT (setle (XLenVT GPR:$y), (XLenVT GPR:$x))),
+ (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)),
+ (CCMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select (XLenVT GPR:$rs2), (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)),
+ (CCMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+} // Predicates = [UsesMIPSCMov]
+
+let Predicates = [UsesMIPSLoadStorePairs], hasSideEffects = 0 in {
+def LWP : LWPFormat<(outs GPR:$rd1, GPR:$rd2), (ins GPR:$rs1, uimm7_lsb00:$imm7),
+ "lwp", "$rd1, $rd2, ${imm7}(${rs1})">,
+ Sched<[WriteLDW, WriteLDW, ReadMemBase]> {
+let mayLoad = 1;
+let mayStore = 0;
+}
+def LDP : LDPFormat<(outs GPR:$rd1, GPR:$rd2), (ins GPR:$rs1, uimm7_lsb000:$imm7),
+ "ldp", "$rd1, $rd2, ${imm7}(${rs1})">,
+ Sched<[WriteLDD, WriteLDD, ReadMemBase]> {
+let mayLoad = 1;
+let mayStore = 0;
+}
+def SWP : SWPFormat<(outs), (ins GPR:$rs2, GPR:$rs3, GPR:$rs1, uimm7_lsb00:$imm7),
+ "swp", "$rs2, $rs3, ${imm7}(${rs1})">,
+ Sched<[WriteSTW, ReadStoreData, ReadStoreData, ReadMemBase]> {
+let mayLoad = 0;
+let mayStore = 1;
+}
+def SDP : SDPFormat<(outs), (ins GPR:$rs2, GPR:$rs3, GPR:$rs1, uimm7_lsb000:$imm7),
+ "sdp", "$rs2, $rs3, ${imm7}(${rs1})">,
+ Sched<[WriteSTD, ReadStoreData, ReadStoreData, ReadMemBase]> {
+let mayLoad = 0;
+let mayStore = 1;
+}
+}
+
+//===----------------------------------------------------------------------===//
+
// Standard extensions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index e5a5f60f9fec10..c297e83f4be2c3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -94,131 +94,6 @@ def c_lui_imm : RISCVOp,
}];
}
-// A 7-bit unsigned immediate where the least significant two bits are zero.
-def uimm7_lsb00 : RISCVOp,
- ImmLeaf<XLenVT, [{return isShiftedUInt<5, 2>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<7, "Lsb00">;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeUImmOperand<7>";
- let OperandType = "OPERAND_UIMM7_LSB00";
- let MCOperandPredicate = [{
- int64_t Imm;
- if (!MCOp.evaluateAsConstantImm(Imm))
- return false;
- return isShiftedUInt<5, 2>(Imm);
- }];
-}
-
-// A 8-bit unsigned immediate where the least significant two bits are zero.
-def uimm8_lsb00 : RISCVOp,
- ImmLeaf<XLenVT, [{return isShiftedUInt<6, 2>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<8, "Lsb00">;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeUImmOperand<8>";
- let OperandType = "OPERAND_UIMM8_LSB00";
- let MCOperandPredicate = [{
- int64_t Imm;
- if (!MCOp.evaluateAsConstantImm(Imm))
- return false;
- return isShiftedUInt<6, 2>(Imm);
- }];
-}
-
-// A 8-bit unsigned immediate where the least significant three bits are zero.
-def uimm8_lsb000 : RISCVOp,
- ImmLeaf<XLenVT, [{return isShiftedUInt<5, 3>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<8, "Lsb000">;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeUImmOperand<8>";
- let OperandType = "OPERAND_UIMM8_LSB000";
- let MCOperandPredicate = [{
- int64_t Imm;
- if (!MCOp.evaluateAsConstantImm(Imm))
- return false;
- return isShiftedUInt<5, 3>(Imm);
- }];
-}
-
-// A 9-bit signed immediate where the least significant bit is zero.
-def simm9_lsb0 : Operand<OtherVT>,
- ImmLeaf<XLenVT, [{return isShiftedInt<8, 1>(Imm);}]> {
- let ParserMatchClass = SImmAsmOperand<9, "Lsb0">;
- let PrintMethod = "printBranchOperand";
- let EncoderMethod = "getImmOpValueAsr1";
- let DecoderMethod = "decodeSImmOperandAndLsl1<9>";
- let MCOperandPredicate = [{
- int64_t Imm;
- if (MCOp.evaluateAsConstantImm(Imm))
- return isShiftedInt<8, 1>(Imm);
- return MCOp.isBareSymbolRef();
-
- }];
- let OperandType = "OPERAND_PCREL";
-}
-
-// A 9-bit unsigned immediate where the least significant three bits are zero.
-def uimm9_lsb000 : RISCVOp,
- ImmLeaf<XLenVT, [{return isShiftedUInt<6, 3>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<9, "Lsb000">;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeUImmOperand<9>";
- let OperandType = "OPERAND_UIMM9_LSB000";
- let MCOperandPredicate = [{
- int64_t Imm;
- if (!MCOp.evaluateAsConstantImm(Imm))
- return false;
- return isShiftedUInt<6, 3>(Imm);
- }];
-}
-
-// A 10-bit unsigned immediate where the least significant two bits are zero
-// and the immediate can't be zero.
-def uimm10_lsb00nonzero : RISCVOp,
- ImmLeaf<XLenVT,
- [{return isShiftedUInt<8, 2>(Imm) && (Imm != 0);}]> {
- let ParserMatchClass = UImmAsmOperand<10, "Lsb00NonZero">;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeUImmNonZeroOperand<10>";
- let OperandType = "OPERAND_UIMM10_LSB00_NONZERO";
- let MCOperandPredicate = [{
- int64_t Imm;
- if (!MCOp.evaluateAsConstantImm(Imm))
- return false;
- return isShiftedUInt<8, 2>(Imm) && (Imm != 0);
- }];
-}
-
-// A 10-bit signed immediate where the least significant four bits are zero.
-def simm10_lsb0000nonzero : RISCVOp,
- ImmLeaf<XLenVT,
- [{return (Imm != 0) && isShiftedInt<6, 4>(Imm);}]> {
- let ParserMatchClass = SImmAsmOperand<10, "Lsb0000NonZero">;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeSImmNonZeroOperand<10>";
- let OperandType = "OPERAND_SIMM10_LSB0000_NONZERO";
- let MCOperandPredicate = [{
- int64_t Imm;
- if (!MCOp.evaluateAsConstantImm(Imm))
- return false;
- return isShiftedInt<6, 4>(Imm) && (Imm != 0);
- }];
-}
-
-// A 12-bit signed immediate where the least significant bit is zero.
-def simm12_lsb0 : Operand<XLenVT>,
- ImmLeaf<XLenVT, [{return isShiftedInt<11, 1>(Imm);}]> {
- let ParserMatchClass = SImmAsmOperand<12, "Lsb0">;
- let PrintMethod = "printBranchOperand";
- let EncoderMethod = "getImmOpValueAsr1";
- let DecoderMethod = "decodeSImmOperandAndLsl1<12>";
- let MCOperandPredicate = [{
- int64_t Imm;
- if (MCOp.evaluateAsConstantImm(Imm))
- return isShiftedInt<11, 1>(Imm);
- return MCOp.isBareSymbolRef();
- }];
- let OperandType = "OPERAND_PCREL";
-}
def InsnCDirectiveOpcode : AsmOperandClass {
let Name = "InsnCDirectiveOpcode";
diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
new file mode 100644
index 00000000000000..52dc97664d9836
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
@@ -0,0 +1,371 @@
+//===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Bundle loads and stores that operate on consecutive memory locations to take
+// the advantage of hardware load/store bonding.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-load-store-opt"
+#define RISCV_LOAD_STORE_OPT_NAME "RISCV Load / Store Optimizer"
+namespace {
+
+struct RISCVLoadStoreOpt : public MachineFunctionPass {
+ static char ID;
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ RISCVLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; }
+
+ // Find and pair load/store instructions.
+ bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
+
+ // Convert load/store pairs to single instructions.
+ bool tryConvertToLdStPair(MachineBasicBlock::iterator First,
+ MachineBasicBlock::iterator Second);
+
+ // Scan the instructions looking for a load/store that can be combined
+ // with the current instruction into a load/store pair.
+ // Return the matching instruction if one is found, else MBB->end().
+ MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
+ bool &MergeForward);
+
+ MachineBasicBlock::iterator
+ mergePairedInsns(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired, bool MergeForward);
+
+private:
+ AliasAnalysis *AA;
+ MachineRegisterInfo *MRI;
+ const RISCVInstrInfo *TII;
+ const RISCVRegisterInfo *TRI;
+ LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+ bool UseLoadStorePair = false;
+};
+} // end anonymous namespace
+
+char RISCVLoadStoreOpt::ID = 0;
+INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false,
+ false)
+
+bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ if (skipFunction(Fn.getFunction()))
+ return false;
+ const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>();
+
+ if (!Subtarget.useLoadStorePairs())
+ return false;
+
+ bool MadeChange = false;
+ TII = Subtarget.getInstrInfo();
+ TRI = Subtarget.getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ ModifiedRegUnits.init(*TRI);
+ UsedRegUnits.init(*TRI);
+ UseLoadStorePair = Subtarget.useLoadStorePairs();
+
+ for (MachineBasicBlock &MBB : Fn) {
+ LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
+
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MBBI != E;) {
+ if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
+ tryToPairLdStInst(MBBI))
+ MadeChange = true;
+ else
+ ++MBBI;
+ }
+ }
+ return MadeChange;
+}
+
+// Find loads and stores that can be merged into a single load or store pair
+// instruction.
+bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock::iterator E = MI.getParent()->end();
+
+ if (!TII->isLdStSafeToPair(MI, TRI))
+ return false;
+
+ // Look ahead for a pairable instruction.
+ bool MergeForward;
+ MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward);
+ if (Paired != E) {
+ MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
+ return true;
+ }
+ return false;
+}
+
+bool RISCVLoadStoreOpt::tryConvertToLdStPair(
+ MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) {
+ if (!UseLoadStorePair)
+ return false;
+
+ unsigned PairOpc;
+ switch (First->getOpcode()) {
+ default:
+ return false;
+ case RISCV::SW:
+ PairOpc = RISCV::SWP;
+ break;
+ case RISCV::LW:
+ PairOpc = RISCV::LWP;
+ break;
+ case RISCV::SD:
+ PairOpc = RISCV::SDP;
+ break;
+ case RISCV::LD:
+ PairOpc = RISCV::LDP;
+ break;
+ }
+
+ MachineFunction *MF = First->getMF();
+ const MachineMemOperand *MMO = *First->memoperands_begin();
+ Align MMOAlign = MMO->getAlign();
+ if (const PseudoSourceValue *Source = MMO->getPseudoValue())
+ if (Source->kind() == PseudoSourceValue::FixedStack)
+ MMOAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
+
+ if (MMOAlign < Align(MMO->getSize().getValue() * 2))
+ return false;
+ int64_t Offset = First->getOperand(2).getImm();
+ if (!isUInt<7>(Offset) ||
+ !isAligned(Align(MMO->getSize().getValue()), Offset))
+ return false;
+ MachineInstrBuilder MIB = BuildMI(
+ *MF,
+ First->getDebugLoc().get() ? First->getDebugLoc() : Second->getDebugLoc(),
+ TII->get(PairOpc));
+ MIB.add(First->getOperand(0))
+ .add(Second->getOperand(0))
+ .add(First->getOperand(1))
+ .add(First->getOperand(2))
+ .cloneMergedMemRefs({&*First, &*Second});
+
+ First->getParent()->insert(First, MIB);
+
+ First->removeFromParent();
+ Second->removeFromParent();
+
+ return true;
+}
+
+/// TODO: Move to lambda
+static bool mayAlias(MachineInstr &MIa,
+ SmallVectorImpl<MachineInstr *> &MemInsns,
+ AliasAnalysis *AA) {
+ for (MachineInstr *MIb : MemInsns)
+ if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
+ return true;
+
+ return false;
+}
+
+/// Scan the instructions looking for a load/store that can be combined with the
+/// current instruction into a wider equivalent or a load/store pair.
+MachineBasicBlock::iterator
+RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
+ bool &MergeForward) {
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator MBBI = I;
+ MachineInstr &FirstMI = *I;
+ MBBI = next_nodbg(MBBI, E);
+
+ bool MayLoad = FirstMI.mayLoad();
+ Register Reg = FirstMI.getOperand(0).getReg();
+ Register BaseReg = FirstMI.getOperand(1).getReg();
+ int Offset = FirstMI.getOperand(2).getImm();
+ int OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue();
+
+ LiveRegUnits UsedInBetween;
+ UsedInBetween.init(*TRI);
+
+ MergeForward = false;
+
+ // Track which register units have been modified and used between the first
+ // insn (inclusive) and the second insn.
+ ModifiedRegUnits.clear();
+ UsedRegUnits.clear();
+
+ // Remember any instructions that read/write memory between FirstMI and MI.
+ SmallVector<MachineInstr *, 4> MemInsns;
+
+ for (unsigned Count = 0; MBBI != E && Count < 128;
+ MBBI = next_nodbg(MBBI, E)) {
+ MachineInstr &MI = *MBBI;
+
+ UsedInBetween.accumulate(MI);
+
+ // Don't count transient instructions towards the search limit since there
+ // may be different numbers of them if e.g. debug information is present.
+ if (!MI.isTransient())
+ ++Count;
+
+ if (MI.getOpcode() == FirstMI.getOpcode() &&
+ TII->isLdStSafeToPair(MI, TRI)) {
+ Register MIBaseReg = MI.getOperand(1).getReg();
+ int MIOffset = MI.getOperand(2).getImm();
+
+ if (BaseReg == MIBaseReg) {
+
+ if ((Offset != MIOffset + OffsetStride) &&
+ (Offset + OffsetStride != MIOffset)) {
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
+ TRI);
+ MemInsns.push_back(&MI);
+ continue;
+ }
+
+ // If the destination register of one load is the same register or a
+ // sub/super register of the other load, bail and keep looking.
+ if (MayLoad &&
+ TRI->isSuperOrSubRegisterEq(Reg, MI.getOperand(0).getReg())) {
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
+ TRI);
+ MemInsns.push_back(&MI);
+ continue;
+ }
+
+ // If the BaseReg has been modified, then we cannot do the optimization.
+ if (!ModifiedRegUnits.available(BaseReg))
+ return E;
+
+ // If the Rt of the second instruction was not modified or used between
+ // the two instructions and none of the instructions between the second
+ // and first alias with the second, we can combine the second into the
+ // first.
+ if (ModifiedRegUnits.available(MI.getOperand(0).getReg()) &&
+ !(MI.mayLoad() &&
+ !UsedRegUnits.available(MI.getOperand(0).getReg())) &&
+ !mayAlias(MI, MemInsns, AA)) {
+
+ MergeForward = false;
+ return MBBI;
+ }
+
+ // Likewise, if the Rt of the first instruction is not modified or used
+ // between the two instructions and none of the instructions between the
+ // first and the second alias with the first, we can combine the first
+ // into the second.
+ if (!(MayLoad &&
+ !UsedRegUnits.available(FirstMI.getOperand(0).getReg())) &&
+ !mayAlias(FirstMI, MemInsns, AA)) {
+
+ if (ModifiedRegUnits.available(FirstMI.getOperand(0).getReg())) {
+ MergeForward = true;
+ return MBBI;
+ }
+ }
+ // Unable to combine these instructions due to interference in between.
+ // Keep looking.
+ }
+ }
+
+ // If the instruction wasn't a matching load or store. Stop searching if we
+ // encounter a call instruction that might modify memory.
+ if (MI.isCall())
+ return E;
+
+ // Update modified / uses register units.
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
+
+ // Otherwise, if the base register is modified, we have no match, so
+ // return early.
+ if (!ModifiedRegUnits.available(BaseReg))
+ return E;
+
+ // Update list of instructions that read/write memory.
+ if (MI.mayLoadOrStore())
+ MemInsns.push_back(&MI);
+ }
+ return E;
+}
+
+MachineBasicBlock::iterator __attribute__((noinline))
+RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired,
+ bool MergeForward) {
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator NextI = next_nodbg(I, E);
+ if (NextI == Paired)
+ NextI = next_nodbg(NextI, E);
+
+ // Insert our new paired instruction after whichever of the paired
+ // instructions MergeForward indicates.
+ MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
+ MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired;
+ int Offset = I->getOperand(2).getImm();
+ int PairedOffset = Paired->getOperand(2).getImm();
+ bool InsertAfter = (Offset < PairedOffset) ^ MergeForward;
+
+ if (!MergeForward)
+ Paired->getOperand(1).setIsKill(false);
+
+ // Kill flags may become invalid when moving stores for pairing.
+ if (I->getOperand(0).isUse()) {
+ if (!MergeForward) {
+ // Clear kill flags on store if moving upwards.
+ I->getOperand(0).setIsKill(false);
+ Paired->getOperand(0).setIsKill(false);
+ } else {
+ // Clear kill flags of the first stores register.
+ Register Reg = I->getOperand(0).getReg();
+ for (MachineInstr &MI : make_range(std::next(I), Paired))
+ MI.clearRegisterKills(Reg, TRI);
+ }
+ }
+
+ MachineInstr *ToInsert = DeletionPoint->removeFromParent();
+ MachineBasicBlock &MBB = *InsertionPoint->getParent();
+ MachineBasicBlock::iterator First, Second;
+
+ if (!InsertAfter) {
+ First = MBB.insert(InsertionPoint, ToInsert);
+ Second = InsertionPoint;
+ } else {
+ Second = MBB.insertAfter(InsertionPoint, ToInsert);
+ First = InsertionPoint;
+ }
+
+ if (!tryConvertToLdStPair(First, Second))
+ finalizeBundle(MBB, First.getInstrIterator(),
+ std::next(Second).getInstrIterator());
+
+ LLVM_DEBUG(dbgs() << "Bonding pair load/store:\n ");
+ LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs()));
+ return NextI;
+}
+
+/// Returns an instance of the Load / Store Optimization pass.
+FunctionPass *llvm::createRISCVLoadStoreOptPass() {
+ return new RISCVLoadStoreOpt();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index cae4fdd88d558f..8a5a9b4f19ecb6 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -88,7 +88,9 @@ def MIPS_P8700 : RISCVProcessorModel<"mips-p8700",
FeatureStdExtD,
FeatureStdExtC,
FeatureStdExtZba,
- FeatureStdExtZbb],
+ FeatureStdExtZbb,
+ FeatureMIPSCMov,
+ FeatureMIPSLoadStorePairs],
[TuneMIPSP8700]>;
def ROCKET_RV32 : RISCVProcessorModel<"rocket-rv32",
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 426d3682049046..cd53ce975429c6 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -62,6 +62,15 @@ static cl::opt<unsigned> RISCVMinimumJumpTableEntries(
"riscv-min-jump-table-entries", cl::Hidden,
cl::desc("Set minimum number of entries to use a jump table on RISCV"));
+static cl::opt<bool>
+ UseLoadStorePairsOpt("riscv-load-store-pairs",
+ cl::desc("RISCV: Optimize for load-store bonding"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> UseCCMovInsn("riscv-ccmov",
+ cl::desc("RISCV: Use 'ccmov' instruction"),
+ cl::init(true), cl::Hidden);
+
void RISCVSubtarget::anchor() {}
RISCVSubtarget &
@@ -207,3 +216,11 @@ void RISCVSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
// register-pressure tracking. This will increase compile time.
Policy.ShouldTrackPressure = true;
}
+
+bool RISCVSubtarget::useLoadStorePairs() const {
+ return UseLoadStorePairsOpt && HasMIPSLSP;
+}
+
+bool RISCVSubtarget::useCCMovInsn() const {
+ return UseCCMovInsn && HasMIPSCMov;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 043838e13b964d..057b7769db6951 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -172,6 +172,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
unsigned getXLen() const {
return is64Bit() ? 64 : 32;
}
+ bool useLoadStorePairs() const;
+ bool useCCMovInsn() const;
unsigned getFLen() const {
if (HasStdExtD)
return 64;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index fa507653264ccd..d361e20346184f 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -140,6 +140,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVDAGToDAGISelLegacyPass(*PR);
initializeRISCVMoveMergePass(*PR);
initializeRISCVPushPopOptPass(*PR);
+ initializeRISCVLoadStoreOptPass(*PR);
}
static StringRef computeDataLayout(const Triple &TT,
@@ -367,6 +368,17 @@ class RISCVPassConfig : public TargetPassConfig {
DAG->addMutation(createStoreClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
}
+
+ const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
+ if (!ST.getMacroFusions().empty()) {
+ DAG = DAG ? DAG : createGenericSchedLive(C);
+
+ const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
+ if (ST.useLoadStorePairs()) {
+ DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+ }
+ }
return DAG;
}
@@ -540,6 +552,9 @@ void RISCVPassConfig::addPreSched2() {
// Emit KCFI checks for indirect calls.
addPass(createKCFIPass());
+ if (TM->getOptLevel() != CodeGenOptLevel::None) {
+ addPass(createRISCVLoadStoreOptPass());
+ }
}
void RISCVPassConfig::addPreEmitPass() {
@@ -553,6 +568,11 @@ void RISCVPassConfig::addPreEmitPass() {
addPass(createMachineCopyPropagationPass(true));
addPass(&BranchRelaxationPassID);
addPass(createRISCVMakeCompressibleOptPass());
+
+ // LoadStoreOptimizer creates bundles for load-store bonding.
+ addPass(createUnpackMachineBundles([](const MachineFunction &MF) {
+ return MF.getSubtarget<RISCVSubtarget>().useLoadStorePairs();
+ }));
}
void RISCVPassConfig::addPreEmitPass2() {
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zba.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zba.ll
index 9584270d8e66f5..f3d6c01f35e55f 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zba.ll
@@ -96,7 +96,7 @@ define i64 @zextw_i64(i64 %a) nounwind {
;
; RV64ZBA-LABEL: zextw_i64:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%and = and i64 %a, 4294967295
ret i64 %and
@@ -117,7 +117,7 @@ define i64 @zextw_demandedbits_i64(i64 %0) {
; RV64ZBA-LABEL: zextw_demandedbits_i64:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: li a1, -2
-; RV64ZBA-NEXT: zext.w a1, a1
+; RV64ZBA-NEXT: add.uw a1, a1, zero
; RV64ZBA-NEXT: and a0, a0, a1
; RV64ZBA-NEXT: ori a0, a0, 1
; RV64ZBA-NEXT: ret
@@ -396,14 +396,14 @@ define zeroext i32 @sext_ashr_zext_i8(i8 %a) nounwind {
; RV64ZBANOZBB: # %bb.0:
; RV64ZBANOZBB-NEXT: slli a0, a0, 56
; RV64ZBANOZBB-NEXT: srai a0, a0, 63
-; RV64ZBANOZBB-NEXT: zext.w a0, a0
+; RV64ZBANOZBB-NEXT: add.uw a0, a0, zero
; RV64ZBANOZBB-NEXT: ret
;
; RV64ZBAZBB-LABEL: sext_ashr_zext_i8:
; RV64ZBAZBB: # %bb.0:
; RV64ZBAZBB-NEXT: sext.b a0, a0
; RV64ZBAZBB-NEXT: srai a0, a0, 9
-; RV64ZBAZBB-NEXT: zext.w a0, a0
+; RV64ZBAZBB-NEXT: add.uw a0, a0, zero
; RV64ZBAZBB-NEXT: ret
%ext = sext i8 %a to i32
%1 = ashr i32 %ext, 9
@@ -516,14 +516,14 @@ define zeroext i32 @sext_ashr_zext_i16(i16 %a) nounwind {
; RV64ZBANOZBB: # %bb.0:
; RV64ZBANOZBB-NEXT: slli a0, a0, 48
; RV64ZBANOZBB-NEXT: srai a0, a0, 57
-; RV64ZBANOZBB-NEXT: zext.w a0, a0
+; RV64ZBANOZBB-NEXT: add.uw a0, a0, zero
; RV64ZBANOZBB-NEXT: ret
;
; RV64ZBAZBB-LABEL: sext_ashr_zext_i16:
; RV64ZBAZBB: # %bb.0:
; RV64ZBAZBB-NEXT: sext.h a0, a0
; RV64ZBAZBB-NEXT: srai a0, a0, 9
-; RV64ZBAZBB-NEXT: zext.w a0, a0
+; RV64ZBAZBB-NEXT: add.uw a0, a0, zero
; RV64ZBAZBB-NEXT: ret
%ext = sext i16 %a to i32
%1 = ashr i32 %ext, 9
@@ -1011,7 +1011,7 @@ define i64 @pack_i64(i64 %a, i64 %b) nounwind {
;
; RV64ZBA-LABEL: pack_i64:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: slli a1, a1, 32
; RV64ZBA-NEXT: or a0, a1, a0
; RV64ZBA-NEXT: ret
@@ -1034,8 +1034,8 @@ define i64 @pack_i64_2(i32 signext %a, i32 signext %b) nounwind {
;
; RV64ZBA-LABEL: pack_i64_2:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: zext.w a0, a0
-; RV64ZBA-NEXT: zext.w a1, a1
+; RV64ZBA-NEXT: add.uw a0, a0, zero
+; RV64ZBA-NEXT: add.uw a1, a1, zero
; RV64ZBA-NEXT: slli a1, a1, 32
; RV64ZBA-NEXT: or a0, a1, a0
; RV64ZBA-NEXT: ret
@@ -1056,7 +1056,7 @@ define i64 @pack_i64_disjoint(i64 %a, i64 %b) nounwind {
;
; RV64ZBA-LABEL: pack_i64_disjoint:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: or a0, a1, a0
; RV64ZBA-NEXT: ret
%shl = and i64 %a, 4294967295
@@ -1074,7 +1074,7 @@ define i64 @pack_i64_disjoint_2(i32 signext %a, i64 %b) nounwind {
;
; RV64ZBA-LABEL: pack_i64_disjoint_2:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: or a0, a1, a0
; RV64ZBA-NEXT: ret
%zexta = zext i32 %a to i64
@@ -1450,7 +1450,7 @@ define ptr @gep_lshr_i32(ptr %0, i64 %1) {
; RV64ZBA-LABEL: gep_lshr_i32:
; RV64ZBA: # %bb.0: # %entry
; RV64ZBA-NEXT: srli a1, a1, 2
-; RV64ZBA-NEXT: zext.w a1, a1
+; RV64ZBA-NEXT: add.uw a1, a1, zero
; RV64ZBA-NEXT: li a2, 80
; RV64ZBA-NEXT: mul a1, a1, a2
; RV64ZBA-NEXT: add a0, a0, a1
@@ -1671,9 +1671,9 @@ define i64 @add_u32simm32_zextw(i64 %x) nounwind {
; RV64ZBA-LABEL: add_u32simm32_zextw:
; RV64ZBA: # %bb.0: # %entry
; RV64ZBA-NEXT: li a1, -2
-; RV64ZBA-NEXT: zext.w a1, a1
+; RV64ZBA-NEXT: add.uw a1, a1, zero
; RV64ZBA-NEXT: add a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
entry:
%add = add i64 %x, 4294967294
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index f60def9d546f81..5ee6c192b80291 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -63,6 +63,7 @@
; CHECK-NEXT: Implement the 'patchable-function' attribute
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: RISC-V Make Compressible
+; CHECK-NEXT: Unpack machine instruction bundles
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 8fd9ae98503665..0dd3dbbafc336c 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -176,6 +176,7 @@
; CHECK-NEXT: Post-RA pseudo instruction expansion pass
; CHECK-NEXT: RISC-V post-regalloc pseudo instruction expansion pass
; CHECK-NEXT: Insert KCFI indirect call checks
+; CHECK-NEXT: RISCV Load / Store Optimizer
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: PostRA Machine Instruction Scheduler
@@ -189,6 +190,7 @@
; CHECK-NEXT: Machine Copy Propagation Pass
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: RISC-V Make Compressible
+; CHECK-NEXT: Unpack machine instruction bundles
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
diff --git a/llvm/test/CodeGen/RISCV/load-store-pair.ll b/llvm/test/CodeGen/RISCV/load-store-pair.ll
new file mode 100644
index 00000000000000..ac62e80334e1ef
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/load-store-pair.ll
@@ -0,0 +1,509 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+d -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32D
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+d -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64D
+; RUN: llc -mtriple=riscv32 -mattr=+xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I_PAIR
+; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+d,+xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32D_PAIR
+; RUN: llc -mtriple=riscv64 -mattr=+xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I_PAIR
+; RUN: llc -mtriple=riscv64 -mcpu mips-p8700 -mattr=+xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64P_8700
+; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+d,+xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64D_PAIR
+; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+d -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64D_8700
+
+define dso_local void @testi(i8** nocapture noundef readonly %a) local_unnamed_addr #0 {
+; RV32I-LABEL: testi:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw s2, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset s2, -4
+; RV32I-NEXT: .cfi_offset s3, -8
+; RV32I-NEXT: .cfi_offset s4, -12
+; RV32I-NEXT: .cfi_offset s5, -16
+; RV32I-NEXT: lw s3, 0(a0)
+; RV32I-NEXT: lw s2, 4(a0)
+; RV32I-NEXT: lw s5, 8(a0)
+; RV32I-NEXT: lw s4, 12(a0)
+; RV32I-NEXT: #APP
+; RV32I-NEXT: #NO_APP
+; RV32I-NEXT: lw s2, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV32D-LABEL: testi:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: addi sp, sp, -16
+; RV32D-NEXT: .cfi_def_cfa_offset 16
+; RV32D-NEXT: sw s2, 12(sp) # 4-byte Folded Spill
+; RV32D-NEXT: sw s3, 8(sp) # 4-byte Folded Spill
+; RV32D-NEXT: sw s4, 4(sp) # 4-byte Folded Spill
+; RV32D-NEXT: sw s5, 0(sp) # 4-byte Folded Spill
+; RV32D-NEXT: .cfi_offset s2, -4
+; RV32D-NEXT: .cfi_offset s3, -8
+; RV32D-NEXT: .cfi_offset s4, -12
+; RV32D-NEXT: .cfi_offset s5, -16
+; RV32D-NEXT: lw s3, 0(a0)
+; RV32D-NEXT: lw s2, 4(a0)
+; RV32D-NEXT: lw s5, 8(a0)
+; RV32D-NEXT: lw s4, 12(a0)
+; RV32D-NEXT: #APP
+; RV32D-NEXT: #NO_APP
+; RV32D-NEXT: lw s2, 12(sp) # 4-byte Folded Reload
+; RV32D-NEXT: lw s3, 8(sp) # 4-byte Folded Reload
+; RV32D-NEXT: lw s4, 4(sp) # 4-byte Folded Reload
+; RV32D-NEXT: lw s5, 0(sp) # 4-byte Folded Reload
+; RV32D-NEXT: .cfi_restore s2
+; RV32D-NEXT: .cfi_restore s3
+; RV32D-NEXT: .cfi_restore s4
+; RV32D-NEXT: .cfi_restore s5
+; RV32D-NEXT: addi sp, sp, 16
+; RV32D-NEXT: .cfi_def_cfa_offset 0
+; RV32D-NEXT: ret
+;
+; RV64I-LABEL: testi:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd s2, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset s2, -8
+; RV64I-NEXT: .cfi_offset s3, -16
+; RV64I-NEXT: .cfi_offset s4, -24
+; RV64I-NEXT: .cfi_offset s5, -32
+; RV64I-NEXT: ld s3, 0(a0)
+; RV64I-NEXT: ld s2, 8(a0)
+; RV64I-NEXT: ld s5, 16(a0)
+; RV64I-NEXT: ld s4, 24(a0)
+; RV64I-NEXT: #APP
+; RV64I-NEXT: #NO_APP
+; RV64I-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: .cfi_restore s4
+; RV64I-NEXT: .cfi_restore s5
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV64D-LABEL: testi:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: addi sp, sp, -32
+; RV64D-NEXT: .cfi_def_cfa_offset 32
+; RV64D-NEXT: sd s2, 24(sp) # 8-byte Folded Spill
+; RV64D-NEXT: sd s3, 16(sp) # 8-byte Folded Spill
+; RV64D-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
+; RV64D-NEXT: sd s5, 0(sp) # 8-byte Folded Spill
+; RV64D-NEXT: .cfi_offset s2, -8
+; RV64D-NEXT: .cfi_offset s3, -16
+; RV64D-NEXT: .cfi_offset s4, -24
+; RV64D-NEXT: .cfi_offset s5, -32
+; RV64D-NEXT: ld s3, 0(a0)
+; RV64D-NEXT: ld s2, 8(a0)
+; RV64D-NEXT: ld s5, 16(a0)
+; RV64D-NEXT: ld s4, 24(a0)
+; RV64D-NEXT: #APP
+; RV64D-NEXT: #NO_APP
+; RV64D-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
+; RV64D-NEXT: ld s3, 16(sp) # 8-byte Folded Reload
+; RV64D-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
+; RV64D-NEXT: ld s5, 0(sp) # 8-byte Folded Reload
+; RV64D-NEXT: .cfi_restore s2
+; RV64D-NEXT: .cfi_restore s3
+; RV64D-NEXT: .cfi_restore s4
+; RV64D-NEXT: .cfi_restore s5
+; RV64D-NEXT: addi sp, sp, 32
+; RV64D-NEXT: .cfi_def_cfa_offset 0
+; RV64D-NEXT: ret
+;
+; RV32I_PAIR-LABEL: testi:
+; RV32I_PAIR: # %bb.0: # %entry
+; RV32I_PAIR-NEXT: addi sp, sp, -16
+; RV32I_PAIR-NEXT: .cfi_def_cfa_offset 16
+; RV32I_PAIR-NEXT: swp s3, s2, 8(sp) # 8-byte Folded Spill
+; RV32I_PAIR-NEXT: swp s5, s4, 0(sp) # 8-byte Folded Spill
+; RV32I_PAIR-NEXT: .cfi_offset s2, -4
+; RV32I_PAIR-NEXT: .cfi_offset s3, -8
+; RV32I_PAIR-NEXT: .cfi_offset s4, -12
+; RV32I_PAIR-NEXT: .cfi_offset s5, -16
+; RV32I_PAIR-NEXT: lwp s3, s2, 0(a0)
+; RV32I_PAIR-NEXT: lwp s5, s4, 8(a0)
+; RV32I_PAIR-NEXT: #APP
+; RV32I_PAIR-NEXT: #NO_APP
+; RV32I_PAIR-NEXT: lwp s3, s2, 8(sp) # 8-byte Folded Reload
+; RV32I_PAIR-NEXT: lwp s5, s4, 0(sp) # 8-byte Folded Reload
+; RV32I_PAIR-NEXT: .cfi_restore s2
+; RV32I_PAIR-NEXT: .cfi_restore s3
+; RV32I_PAIR-NEXT: .cfi_restore s4
+; RV32I_PAIR-NEXT: .cfi_restore s5
+; RV32I_PAIR-NEXT: addi sp, sp, 16
+; RV32I_PAIR-NEXT: .cfi_def_cfa_offset 0
+; RV32I_PAIR-NEXT: ret
+;
+; RV32D_PAIR-LABEL: testi:
+; RV32D_PAIR: # %bb.0: # %entry
+; RV32D_PAIR-NEXT: addi sp, sp, -16
+; RV32D_PAIR-NEXT: .cfi_def_cfa_offset 16
+; RV32D_PAIR-NEXT: swp s3, s2, 8(sp) # 8-byte Folded Spill
+; RV32D_PAIR-NEXT: swp s5, s4, 0(sp) # 8-byte Folded Spill
+; RV32D_PAIR-NEXT: .cfi_offset s2, -4
+; RV32D_PAIR-NEXT: .cfi_offset s3, -8
+; RV32D_PAIR-NEXT: .cfi_offset s4, -12
+; RV32D_PAIR-NEXT: .cfi_offset s5, -16
+; RV32D_PAIR-NEXT: lwp s3, s2, 0(a0)
+; RV32D_PAIR-NEXT: lwp s5, s4, 8(a0)
+; RV32D_PAIR-NEXT: #APP
+; RV32D_PAIR-NEXT: #NO_APP
+; RV32D_PAIR-NEXT: lwp s3, s2, 8(sp) # 8-byte Folded Reload
+; RV32D_PAIR-NEXT: lwp s5, s4, 0(sp) # 8-byte Folded Reload
+; RV32D_PAIR-NEXT: .cfi_restore s2
+; RV32D_PAIR-NEXT: .cfi_restore s3
+; RV32D_PAIR-NEXT: .cfi_restore s4
+; RV32D_PAIR-NEXT: .cfi_restore s5
+; RV32D_PAIR-NEXT: addi sp, sp, 16
+; RV32D_PAIR-NEXT: .cfi_def_cfa_offset 0
+; RV32D_PAIR-NEXT: ret
+;
+; RV64I_PAIR-LABEL: testi:
+; RV64I_PAIR: # %bb.0: # %entry
+; RV64I_PAIR-NEXT: addi sp, sp, -32
+; RV64I_PAIR-NEXT: .cfi_def_cfa_offset 32
+; RV64I_PAIR-NEXT: sdp s3, s2, 16(sp) # 16-byte Folded Spill
+; RV64I_PAIR-NEXT: sdp s5, s4, 0(sp) # 16-byte Folded Spill
+; RV64I_PAIR-NEXT: .cfi_offset s2, -8
+; RV64I_PAIR-NEXT: .cfi_offset s3, -16
+; RV64I_PAIR-NEXT: .cfi_offset s4, -24
+; RV64I_PAIR-NEXT: .cfi_offset s5, -32
+; RV64I_PAIR-NEXT: ld s3, 0(a0)
+; RV64I_PAIR-NEXT: ld s2, 8(a0)
+; RV64I_PAIR-NEXT: ld s5, 16(a0)
+; RV64I_PAIR-NEXT: ld s4, 24(a0)
+; RV64I_PAIR-NEXT: #APP
+; RV64I_PAIR-NEXT: #NO_APP
+; RV64I_PAIR-NEXT: ldp s3, s2, 16(sp) # 16-byte Folded Reload
+; RV64I_PAIR-NEXT: ldp s5, s4, 0(sp) # 16-byte Folded Reload
+; RV64I_PAIR-NEXT: .cfi_restore s2
+; RV64I_PAIR-NEXT: .cfi_restore s3
+; RV64I_PAIR-NEXT: .cfi_restore s4
+; RV64I_PAIR-NEXT: .cfi_restore s5
+; RV64I_PAIR-NEXT: addi sp, sp, 32
+; RV64I_PAIR-NEXT: .cfi_def_cfa_offset 0
+; RV64I_PAIR-NEXT: ret
+;
+; RV64P_8700-LABEL: testi:
+; RV64P_8700: # %bb.0: # %entry
+; RV64P_8700-NEXT: addi sp, sp, -32
+; RV64P_8700-NEXT: .cfi_def_cfa_offset 32
+; RV64P_8700-NEXT: sdp s3, s2, 16(sp) # 16-byte Folded Spill
+; RV64P_8700-NEXT: sdp s5, s4, 0(sp) # 16-byte Folded Spill
+; RV64P_8700-NEXT: .cfi_offset s2, -8
+; RV64P_8700-NEXT: .cfi_offset s3, -16
+; RV64P_8700-NEXT: .cfi_offset s4, -24
+; RV64P_8700-NEXT: .cfi_offset s5, -32
+; RV64P_8700-NEXT: ld s3, 0(a0)
+; RV64P_8700-NEXT: ld s2, 8(a0)
+; RV64P_8700-NEXT: ld s5, 16(a0)
+; RV64P_8700-NEXT: ld s4, 24(a0)
+; RV64P_8700-NEXT: #APP
+; RV64P_8700-NEXT: #NO_APP
+; RV64P_8700-NEXT: ldp s3, s2, 16(sp) # 16-byte Folded Reload
+; RV64P_8700-NEXT: ldp s5, s4, 0(sp) # 16-byte Folded Reload
+; RV64P_8700-NEXT: .cfi_restore s2
+; RV64P_8700-NEXT: .cfi_restore s3
+; RV64P_8700-NEXT: .cfi_restore s4
+; RV64P_8700-NEXT: .cfi_restore s5
+; RV64P_8700-NEXT: addi sp, sp, 32
+; RV64P_8700-NEXT: .cfi_def_cfa_offset 0
+; RV64P_8700-NEXT: ret
+;
+; RV64D_PAIR-LABEL: testi:
+; RV64D_PAIR: # %bb.0: # %entry
+; RV64D_PAIR-NEXT: addi sp, sp, -32
+; RV64D_PAIR-NEXT: .cfi_def_cfa_offset 32
+; RV64D_PAIR-NEXT: sdp s3, s2, 16(sp) # 16-byte Folded Spill
+; RV64D_PAIR-NEXT: sdp s5, s4, 0(sp) # 16-byte Folded Spill
+; RV64D_PAIR-NEXT: .cfi_offset s2, -8
+; RV64D_PAIR-NEXT: .cfi_offset s3, -16
+; RV64D_PAIR-NEXT: .cfi_offset s4, -24
+; RV64D_PAIR-NEXT: .cfi_offset s5, -32
+; RV64D_PAIR-NEXT: ld s3, 0(a0)
+; RV64D_PAIR-NEXT: ld s2, 8(a0)
+; RV64D_PAIR-NEXT: ld s5, 16(a0)
+; RV64D_PAIR-NEXT: ld s4, 24(a0)
+; RV64D_PAIR-NEXT: #APP
+; RV64D_PAIR-NEXT: #NO_APP
+; RV64D_PAIR-NEXT: ldp s3, s2, 16(sp) # 16-byte Folded Reload
+; RV64D_PAIR-NEXT: ldp s5, s4, 0(sp) # 16-byte Folded Reload
+; RV64D_PAIR-NEXT: .cfi_restore s2
+; RV64D_PAIR-NEXT: .cfi_restore s3
+; RV64D_PAIR-NEXT: .cfi_restore s4
+; RV64D_PAIR-NEXT: .cfi_restore s5
+; RV64D_PAIR-NEXT: addi sp, sp, 32
+; RV64D_PAIR-NEXT: .cfi_def_cfa_offset 0
+; RV64D_PAIR-NEXT: ret
+;
+; RV64D_8700-LABEL: testi:
+; RV64D_8700: # %bb.0: # %entry
+; RV64D_8700-NEXT: addi sp, sp, -32
+; RV64D_8700-NEXT: .cfi_def_cfa_offset 32
+; RV64D_8700-NEXT: sd s2, 24(sp) # 8-byte Folded Spill
+; RV64D_8700-NEXT: sd s3, 16(sp) # 8-byte Folded Spill
+; RV64D_8700-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
+; RV64D_8700-NEXT: sd s5, 0(sp) # 8-byte Folded Spill
+; RV64D_8700-NEXT: .cfi_offset s2, -8
+; RV64D_8700-NEXT: .cfi_offset s3, -16
+; RV64D_8700-NEXT: .cfi_offset s4, -24
+; RV64D_8700-NEXT: .cfi_offset s5, -32
+; RV64D_8700-NEXT: ld s3, 0(a0)
+; RV64D_8700-NEXT: ld s2, 8(a0)
+; RV64D_8700-NEXT: ld s5, 16(a0)
+; RV64D_8700-NEXT: ld s4, 24(a0)
+; RV64D_8700-NEXT: #APP
+; RV64D_8700-NEXT: #NO_APP
+; RV64D_8700-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
+; RV64D_8700-NEXT: ld s3, 16(sp) # 8-byte Folded Reload
+; RV64D_8700-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
+; RV64D_8700-NEXT: ld s5, 0(sp) # 8-byte Folded Reload
+; RV64D_8700-NEXT: .cfi_restore s2
+; RV64D_8700-NEXT: .cfi_restore s3
+; RV64D_8700-NEXT: .cfi_restore s4
+; RV64D_8700-NEXT: .cfi_restore s5
+; RV64D_8700-NEXT: addi sp, sp, 32
+; RV64D_8700-NEXT: .cfi_def_cfa_offset 0
+; RV64D_8700-NEXT: ret
+entry:
+ %arrayidx = getelementptr inbounds i8*, i8** %a, i64 1
+ %0 = load i8*, i8** %arrayidx, align 8
+ %1 = load i8*, i8** %a, align 8
+ %arrayidx2 = getelementptr inbounds i8*, i8** %a, i64 3
+ %2 = load i8*, i8** %arrayidx2, align 8
+ %arrayidx3 = getelementptr inbounds i8*, i8** %a, i64 2
+ %3 = load i8*, i8** %arrayidx3, align 8
+ tail call void asm sideeffect "", "{x18},{x19},{x20},{x21}"(i8* %0, i8* %1, i8* %2, i8* %3)
+ ret void
+}
+
+
+define dso_local void @testf(float* nocapture noundef readonly %a) local_unnamed_addr #0 {
+; RV32I-LABEL: testf:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a3, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
+; RV32I-NEXT: lw a2, 8(a0)
+; RV32I-NEXT: lw a1, 12(a0)
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: tail sinkf
+;
+; RV32D-LABEL: testf:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: flw fa3, 0(a0)
+; RV32D-NEXT: flw fa0, 4(a0)
+; RV32D-NEXT: flw fa2, 8(a0)
+; RV32D-NEXT: flw fa1, 12(a0)
+; RV32D-NEXT: tail sinkf
+;
+; RV64I-LABEL: testf:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a4, 4(a0)
+; RV64I-NEXT: lw a2, 8(a0)
+; RV64I-NEXT: lw a1, 12(a0)
+; RV64I-NEXT: mv a0, a4
+; RV64I-NEXT: tail sinkf
+;
+; RV64D-LABEL: testf:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: flw fa3, 0(a0)
+; RV64D-NEXT: flw fa0, 4(a0)
+; RV64D-NEXT: flw fa2, 8(a0)
+; RV64D-NEXT: flw fa1, 12(a0)
+; RV64D-NEXT: tail sinkf
+;
+; RV32I_PAIR-LABEL: testf:
+; RV32I_PAIR: # %bb.0: # %entry
+; RV32I_PAIR-NEXT: lw a3, 0(a0)
+; RV32I_PAIR-NEXT: lw a4, 4(a0)
+; RV32I_PAIR-NEXT: lw a2, 8(a0)
+; RV32I_PAIR-NEXT: lw a1, 12(a0)
+; RV32I_PAIR-NEXT: mv a0, a4
+; RV32I_PAIR-NEXT: tail sinkf
+;
+; RV32D_PAIR-LABEL: testf:
+; RV32D_PAIR: # %bb.0: # %entry
+; RV32D_PAIR-NEXT: flw fa3, 0(a0)
+; RV32D_PAIR-NEXT: flw fa0, 4(a0)
+; RV32D_PAIR-NEXT: flw fa2, 8(a0)
+; RV32D_PAIR-NEXT: flw fa1, 12(a0)
+; RV32D_PAIR-NEXT: tail sinkf
+;
+; RV64I_PAIR-LABEL: testf:
+; RV64I_PAIR: # %bb.0: # %entry
+; RV64I_PAIR-NEXT: lw a3, 0(a0)
+; RV64I_PAIR-NEXT: lw a4, 4(a0)
+; RV64I_PAIR-NEXT: lw a2, 8(a0)
+; RV64I_PAIR-NEXT: lw a1, 12(a0)
+; RV64I_PAIR-NEXT: mv a0, a4
+; RV64I_PAIR-NEXT: tail sinkf
+;
+; RV64P_8700-LABEL: testf:
+; RV64P_8700: # %bb.0: # %entry
+; RV64P_8700-NEXT: flw fa3, 0(a0)
+; RV64P_8700-NEXT: flw fa0, 4(a0)
+; RV64P_8700-NEXT: flw fa2, 8(a0)
+; RV64P_8700-NEXT: flw fa1, 12(a0)
+; RV64P_8700-NEXT: tail sinkf
+;
+; RV64D_PAIR-LABEL: testf:
+; RV64D_PAIR: # %bb.0: # %entry
+; RV64D_PAIR-NEXT: flw fa3, 0(a0)
+; RV64D_PAIR-NEXT: flw fa0, 4(a0)
+; RV64D_PAIR-NEXT: flw fa2, 8(a0)
+; RV64D_PAIR-NEXT: flw fa1, 12(a0)
+; RV64D_PAIR-NEXT: tail sinkf
+;
+; RV64D_8700-LABEL: testf:
+; RV64D_8700: # %bb.0: # %entry
+; RV64D_8700-NEXT: flw fa3, 0(a0)
+; RV64D_8700-NEXT: flw fa0, 4(a0)
+; RV64D_8700-NEXT: flw fa2, 8(a0)
+; RV64D_8700-NEXT: flw fa1, 12(a0)
+; RV64D_8700-NEXT: tail sinkf
+entry:
+ %arrayidx = getelementptr inbounds float, float* %a, i64 1
+ %0 = load float, float* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds float, float* %a, i64 3
+ %1 = load float, float* %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds float, float* %a, i64 2
+ %2 = load float, float* %arrayidx2, align 4
+ %3 = load float, float* %a, align 4
+ tail call void @sinkf(float noundef %0, float noundef %1, float noundef %2, float noundef %3)
+ ret void
+}
+
+declare dso_local void @sinkf(float noundef, float noundef, float noundef, float noundef) local_unnamed_addr
+
+define dso_local void @testd(double* nocapture noundef readonly %a) local_unnamed_addr #0 {
+; RV32I-LABEL: testd:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a4, 16(a0)
+; RV32I-NEXT: lw a5, 20(a0)
+; RV32I-NEXT: lw a2, 24(a0)
+; RV32I-NEXT: lw a3, 28(a0)
+; RV32I-NEXT: lw a6, 0(a0)
+; RV32I-NEXT: lw a7, 4(a0)
+; RV32I-NEXT: lw t0, 8(a0)
+; RV32I-NEXT: lw a1, 12(a0)
+; RV32I-NEXT: mv a0, t0
+; RV32I-NEXT: tail sinkd
+;
+; RV32D-LABEL: testd:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: fld fa3, 0(a0)
+; RV32D-NEXT: fld fa0, 8(a0)
+; RV32D-NEXT: fld fa2, 16(a0)
+; RV32D-NEXT: fld fa1, 24(a0)
+; RV32D-NEXT: tail sinkd
+;
+; RV64I-LABEL: testd:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: ld a3, 0(a0)
+; RV64I-NEXT: ld a4, 8(a0)
+; RV64I-NEXT: ld a2, 16(a0)
+; RV64I-NEXT: ld a1, 24(a0)
+; RV64I-NEXT: mv a0, a4
+; RV64I-NEXT: tail sinkd
+;
+; RV64D-LABEL: testd:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: fld fa3, 0(a0)
+; RV64D-NEXT: fld fa0, 8(a0)
+; RV64D-NEXT: fld fa2, 16(a0)
+; RV64D-NEXT: fld fa1, 24(a0)
+; RV64D-NEXT: tail sinkd
+;
+; RV32I_PAIR-LABEL: testd:
+; RV32I_PAIR: # %bb.0: # %entry
+; RV32I_PAIR-NEXT: lwp a4, a5, 16(a0)
+; RV32I_PAIR-NEXT: lwp a2, a3, 24(a0)
+; RV32I_PAIR-NEXT: lwp a6, a7, 0(a0)
+; RV32I_PAIR-NEXT: lwp a0, a1, 8(a0)
+; RV32I_PAIR-NEXT: tail sinkd
+;
+; RV32D_PAIR-LABEL: testd:
+; RV32D_PAIR: # %bb.0: # %entry
+; RV32D_PAIR-NEXT: fld fa3, 0(a0)
+; RV32D_PAIR-NEXT: fld fa0, 8(a0)
+; RV32D_PAIR-NEXT: fld fa2, 16(a0)
+; RV32D_PAIR-NEXT: fld fa1, 24(a0)
+; RV32D_PAIR-NEXT: tail sinkd
+;
+; RV64I_PAIR-LABEL: testd:
+; RV64I_PAIR: # %bb.0: # %entry
+; RV64I_PAIR-NEXT: ld a3, 0(a0)
+; RV64I_PAIR-NEXT: ld a4, 8(a0)
+; RV64I_PAIR-NEXT: ld a2, 16(a0)
+; RV64I_PAIR-NEXT: ld a1, 24(a0)
+; RV64I_PAIR-NEXT: mv a0, a4
+; RV64I_PAIR-NEXT: tail sinkd
+;
+; RV64P_8700-LABEL: testd:
+; RV64P_8700: # %bb.0: # %entry
+; RV64P_8700-NEXT: fld fa3, 0(a0)
+; RV64P_8700-NEXT: fld fa0, 8(a0)
+; RV64P_8700-NEXT: fld fa2, 16(a0)
+; RV64P_8700-NEXT: fld fa1, 24(a0)
+; RV64P_8700-NEXT: tail sinkd
+;
+; RV64D_PAIR-LABEL: testd:
+; RV64D_PAIR: # %bb.0: # %entry
+; RV64D_PAIR-NEXT: fld fa3, 0(a0)
+; RV64D_PAIR-NEXT: fld fa0, 8(a0)
+; RV64D_PAIR-NEXT: fld fa2, 16(a0)
+; RV64D_PAIR-NEXT: fld fa1, 24(a0)
+; RV64D_PAIR-NEXT: tail sinkd
+;
+; RV64D_8700-LABEL: testd:
+; RV64D_8700: # %bb.0: # %entry
+; RV64D_8700-NEXT: fld fa3, 0(a0)
+; RV64D_8700-NEXT: fld fa0, 8(a0)
+; RV64D_8700-NEXT: fld fa2, 16(a0)
+; RV64D_8700-NEXT: fld fa1, 24(a0)
+; RV64D_8700-NEXT: tail sinkd
+entry:
+ %arrayidx = getelementptr inbounds double, double* %a, i64 1
+ %0 = load double, double* %arrayidx, align 8
+ %arrayidx1 = getelementptr inbounds double, double* %a, i64 3
+ %1 = load double, double* %arrayidx1, align 8
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 2
+ %2 = load double, double* %arrayidx2, align 8
+ %3 = load double, double* %a, align 8
+ tail call void @sinkd(double noundef %0, double noundef %1, double noundef %2, double noundef %3)
+ ret void
+}
+
+declare dso_local void @sinkd(double noundef, double noundef, double noundef, double noundef) local_unnamed_addr
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index 9760821832b375..5c27b84181a496 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -3230,7 +3230,7 @@ define i64 @add_u32simm32_zextw(i64 %x) nounwind {
; RV64ZBA-LABEL: add_u32simm32_zextw:
; RV64ZBA: # %bb.0: # %entry
; RV64ZBA-NEXT: addi a0, a0, -2
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
entry:
%add = add i64 %x, 4294967294
diff --git a/llvm/test/CodeGen/RISCV/select-and.ll b/llvm/test/CodeGen/RISCV/select-and.ll
index d305993f0e966b..e35fcf3d3035e4 100644
--- a/llvm/test/CodeGen/RISCV/select-and.ll
+++ b/llvm/test/CodeGen/RISCV/select-and.ll
@@ -3,6 +3,8 @@
; RUN: | FileCheck -check-prefix=RV32I %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+xmipscmov -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s
;; There are a few different ways to lower (select (and A, B), X, Y). This test
;; ensures that we do so with as few branches as possible.
@@ -27,6 +29,12 @@ define signext i32 @select_of_and(i1 zeroext %a, i1 zeroext %b, i32 signext %c,
; RV64I-NEXT: mv a0, a3
; RV64I-NEXT: .LBB0_2:
; RV64I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: select_of_and:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: and a0, a0, a1
+; RV64I-CCMOV-NEXT: ccmov a0, a0, a2, a3
+; RV64I-CCMOV-NEXT: ret
%1 = and i1 %a, %b
%2 = select i1 %1, i32 %c, i32 %d
ret i32 %2
@@ -69,6 +77,23 @@ define signext i32 @if_of_and(i1 zeroext %a, i1 zeroext %b) nounwind {
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: if_of_and:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: addi sp, sp, -16
+; RV64I-CCMOV-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-CCMOV-NEXT: beqz a0, .LBB1_3
+; RV64I-CCMOV-NEXT: # %bb.1:
+; RV64I-CCMOV-NEXT: beqz a1, .LBB1_3
+; RV64I-CCMOV-NEXT: # %bb.2: # %if.then
+; RV64I-CCMOV-NEXT: call both
+; RV64I-CCMOV-NEXT: j .LBB1_4
+; RV64I-CCMOV-NEXT: .LBB1_3: # %if.else
+; RV64I-CCMOV-NEXT: call neither
+; RV64I-CCMOV-NEXT: .LBB1_4: # %if.end
+; RV64I-CCMOV-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-CCMOV-NEXT: addi sp, sp, 16
+; RV64I-CCMOV-NEXT: ret
%1 = and i1 %a, %b
br i1 %1, label %if.then, label %if.else
diff --git a/llvm/test/CodeGen/RISCV/select-bare.ll b/llvm/test/CodeGen/RISCV/select-bare.ll
index cf8fe96742bfbd..29acf8d347f104 100644
--- a/llvm/test/CodeGen/RISCV/select-bare.ll
+++ b/llvm/test/CodeGen/RISCV/select-bare.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv64 -mattr=+xmipscmov -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s
define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind {
; RV32I-LABEL: bare_select:
@@ -12,6 +14,12 @@ define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind {
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB0_2:
; RV32I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: bare_select:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: andi a0, a0, 1
+; RV64I-CCMOV-NEXT: ccmov a0, a0, a1, a2
+; RV64I-CCMOV-NEXT: ret
%1 = select i1 %a, i32 %b, i32 %c
ret i32 %1
}
@@ -26,6 +34,12 @@ define float @bare_select_float(i1 %a, float %b, float %c) nounwind {
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB1_2:
; RV32I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: bare_select_float:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: andi a0, a0, 1
+; RV64I-CCMOV-NEXT: ccmov a0, a0, a1, a2
+; RV64I-CCMOV-NEXT: ret
%1 = select i1 %a, float %b, float %c
ret float %1
}
diff --git a/llvm/test/CodeGen/RISCV/select-cc.ll b/llvm/test/CodeGen/RISCV/select-cc.ll
index 31e25702da8ba2..11f17831fd17e2 100644
--- a/llvm/test/CodeGen/RISCV/select-cc.ll
+++ b/llvm/test/CodeGen/RISCV/select-cc.ll
@@ -3,6 +3,8 @@
; RUN: | FileCheck -check-prefixes=RV32I %s
; RUN: llc -mtriple=riscv64 -disable-block-placement -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+xmipscmov -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s
define signext i32 @foo(i32 signext %a, ptr %b) nounwind {
; RV32I-LABEL: foo:
@@ -156,6 +158,57 @@ define signext i32 @foo(i32 signext %a, ptr %b) nounwind {
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: .LBB0_28:
; RV64I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: foo:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: lw a2, 0(a1)
+; RV64I-CCMOV-NEXT: lw a3, 0(a1)
+; RV64I-CCMOV-NEXT: lw a4, 0(a1)
+; RV64I-CCMOV-NEXT: lw a5, 0(a1)
+; RV64I-CCMOV-NEXT: xor a6, a0, a2
+; RV64I-CCMOV-NEXT: ccmov a0, a6, a2, a0
+; RV64I-CCMOV-NEXT: xor a2, a0, a3
+; RV64I-CCMOV-NEXT: ccmov a0, a2, a0, a3
+; RV64I-CCMOV-NEXT: lw a2, 0(a1)
+; RV64I-CCMOV-NEXT: sltu a3, a4, a0
+; RV64I-CCMOV-NEXT: ccmov a0, a3, a0, a4
+; RV64I-CCMOV-NEXT: lw a3, 0(a1)
+; RV64I-CCMOV-NEXT: sltu a4, a0, a5
+; RV64I-CCMOV-NEXT: ccmov a0, a4, a5, a0
+; RV64I-CCMOV-NEXT: lw a4, 0(a1)
+; RV64I-CCMOV-NEXT: sltu a5, a0, a2
+; RV64I-CCMOV-NEXT: ccmov a0, a5, a0, a2
+; RV64I-CCMOV-NEXT: lw a2, 0(a1)
+; RV64I-CCMOV-NEXT: sltu a5, a3, a0
+; RV64I-CCMOV-NEXT: ccmov a0, a5, a3, a0
+; RV64I-CCMOV-NEXT: lw a3, 0(a1)
+; RV64I-CCMOV-NEXT: sext.w a5, a0
+; RV64I-CCMOV-NEXT: slt a5, a4, a5
+; RV64I-CCMOV-NEXT: ccmov a0, a5, a0, a4
+; RV64I-CCMOV-NEXT: lw a4, 0(a1)
+; RV64I-CCMOV-NEXT: sext.w a5, a0
+; RV64I-CCMOV-NEXT: slt a5, a5, a2
+; RV64I-CCMOV-NEXT: ccmov a0, a5, a2, a0
+; RV64I-CCMOV-NEXT: lw a2, 0(a1)
+; RV64I-CCMOV-NEXT: sext.w a5, a0
+; RV64I-CCMOV-NEXT: slt a5, a5, a3
+; RV64I-CCMOV-NEXT: ccmov a0, a5, a0, a3
+; RV64I-CCMOV-NEXT: lw a3, 0(a1)
+; RV64I-CCMOV-NEXT: sext.w a5, a0
+; RV64I-CCMOV-NEXT: slt a5, a4, a5
+; RV64I-CCMOV-NEXT: ccmov a0, a5, a4, a0
+; RV64I-CCMOV-NEXT: lw a4, 0(a1)
+; RV64I-CCMOV-NEXT: slti a5, a2, 1
+; RV64I-CCMOV-NEXT: ccmov a0, a5, a0, a2
+; RV64I-CCMOV-NEXT: slti a5, a2, 0
+; RV64I-CCMOV-NEXT: ccmov a0, a5, a3, a0
+; RV64I-CCMOV-NEXT: lw a1, 0(a1)
+; RV64I-CCMOV-NEXT: slti a3, a4, 1025
+; RV64I-CCMOV-NEXT: ccmov a0, a3, a4, a0
+; RV64I-CCMOV-NEXT: sltiu a2, a2, 2047
+; RV64I-CCMOV-NEXT: ccmov a0, a2, a1, a0
+; RV64I-CCMOV-NEXT: sext.w a0, a0
+; RV64I-CCMOV-NEXT: ret
%val1 = load volatile i32, ptr %b
%tst1 = icmp eq i32 %a, %val1
%val2 = select i1 %tst1, i32 %a, i32 %val1
@@ -258,6 +311,23 @@ define signext i16 @numsignbits(i16 signext %0, i16 signext %1, i16 signext %2,
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: numsignbits:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: addi sp, sp, -16
+; RV64I-CCMOV-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-CCMOV-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-CCMOV-NEXT: ccmov s0, a0, a2, a3
+; RV64I-CCMOV-NEXT: beqz a1, .LBB1_2
+; RV64I-CCMOV-NEXT: # %bb.1:
+; RV64I-CCMOV-NEXT: mv a0, s0
+; RV64I-CCMOV-NEXT: call bar
+; RV64I-CCMOV-NEXT: .LBB1_2:
+; RV64I-CCMOV-NEXT: mv a0, s0
+; RV64I-CCMOV-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-CCMOV-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-CCMOV-NEXT: addi sp, sp, 16
+; RV64I-CCMOV-NEXT: ret
%5 = icmp eq i16 %0, 0
%6 = select i1 %5, i16 %3, i16 %2
%7 = icmp eq i16 %1, 0
@@ -295,6 +365,14 @@ define i32 @select_sge_int16min(i32 signext %x, i32 signext %y, i32 signext %z)
; RV64I-NEXT: .LBB2_2:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: select_sge_int16min:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: lui a3, 1048560
+; RV64I-CCMOV-NEXT: addiw a3, a3, -1
+; RV64I-CCMOV-NEXT: slt a0, a3, a0
+; RV64I-CCMOV-NEXT: ccmov a0, a0, a1, a2
+; RV64I-CCMOV-NEXT: ret
%a = icmp sge i32 %x, -65536
%b = select i1 %a, i32 %y, i32 %z
ret i32 %b
@@ -331,6 +409,14 @@ define i64 @select_sge_int32min(i64 %x, i64 %y, i64 %z) {
; RV64I-NEXT: .LBB3_2:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: select_sge_int32min:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: lui a3, 524288
+; RV64I-CCMOV-NEXT: addi a3, a3, -1
+; RV64I-CCMOV-NEXT: slt a0, a3, a0
+; RV64I-CCMOV-NEXT: ccmov a0, a0, a1, a2
+; RV64I-CCMOV-NEXT: ret
%a = icmp sge i64 %x, -2147483648
%b = select i1 %a, i64 %y, i64 %z
ret i64 %b
diff --git a/llvm/test/CodeGen/RISCV/select-or.ll b/llvm/test/CodeGen/RISCV/select-or.ll
index 20a5ec15290cdb..924bb7ff9f97eb 100644
--- a/llvm/test/CodeGen/RISCV/select-or.ll
+++ b/llvm/test/CodeGen/RISCV/select-or.ll
@@ -3,6 +3,8 @@
; RUN: | FileCheck -check-prefix=RV32I %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+xmipscmov -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s
;; There are a few different ways to lower (select (or A, B), X, Y). This test
;; ensures that we do so with as few branches as possible.
@@ -27,6 +29,12 @@ define signext i32 @select_of_or(i1 zeroext %a, i1 zeroext %b, i32 signext %c, i
; RV64I-NEXT: mv a0, a3
; RV64I-NEXT: .LBB0_2:
; RV64I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: select_of_or:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: or a0, a0, a1
+; RV64I-CCMOV-NEXT: ccmov a0, a0, a2, a3
+; RV64I-CCMOV-NEXT: ret
%1 = or i1 %a, %b
%2 = select i1 %1, i32 %c, i32 %d
ret i32 %2
@@ -69,6 +77,23 @@ define signext i32 @if_of_or(i1 zeroext %a, i1 zeroext %b) nounwind {
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: if_of_or:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: addi sp, sp, -16
+; RV64I-CCMOV-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-CCMOV-NEXT: bnez a0, .LBB1_3
+; RV64I-CCMOV-NEXT: # %bb.1:
+; RV64I-CCMOV-NEXT: bnez a1, .LBB1_3
+; RV64I-CCMOV-NEXT: # %bb.2: # %if.else
+; RV64I-CCMOV-NEXT: call neither
+; RV64I-CCMOV-NEXT: j .LBB1_4
+; RV64I-CCMOV-NEXT: .LBB1_3: # %if.then
+; RV64I-CCMOV-NEXT: call either
+; RV64I-CCMOV-NEXT: .LBB1_4: # %if.end
+; RV64I-CCMOV-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-CCMOV-NEXT: addi sp, sp, 16
+; RV64I-CCMOV-NEXT: ret
%1 = or i1 %a, %b
br i1 %1, label %if.then, label %if.else
>From 4aa168aa0c7165abada121fc9e6a62d9d3ae6e4a Mon Sep 17 00:00:00 2001
From: Djordje Todorovic <djordje.todorovic at htecgroup.com>
Date: Thu, 13 Jun 2024 15:46:57 +0200
Subject: [PATCH 3/6] [clang][RISCV] Set default CPU for `mti` vendor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
MipsTechnologies vendor is set up to use the p8700 processor by
default.
co-authored by Jovan Dmitrović <jovan.dmitrovic at htecgroup.com>
---
clang/lib/Driver/ToolChains/Arch/RISCV.cpp | 4 ++++
llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 13 +++++++++++--
2 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
index 6935904a24edbf..3363bb6803914c 100644
--- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
@@ -364,5 +364,9 @@ std::string riscv::getRISCVTargetCPU(const llvm::opt::ArgList &Args,
if (!CPU.empty())
return CPU;
+ if (Triple.getVendor() == llvm::Triple::MipsTechnologies &&
+ Triple.isRISCV64())
+ return "p8700";
+
return Triple.isRISCV64() ? "generic-rv64" : "generic-rv32";
}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index cd53ce975429c6..c7a1405ed2d45e 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -79,8 +79,17 @@ RISCVSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef CPU,
StringRef ABIName) {
// Determine default and user-specified characteristics
bool Is64Bit = TT.isArch64Bit();
- if (CPU.empty() || CPU == "generic")
- CPU = Is64Bit ? "generic-rv64" : "generic-rv32";
+ if (CPU.empty() || CPU == "generic") {
+ if (Is64Bit) {
+ if (TT.getVendor() == llvm::Triple::MipsTechnologies) {
+ CPU = "p8700";
+ } else {
+ CPU = "generic-rv64";
+ }
+ } else {
+ CPU = "generic-rv32";
+ }
+ }
if (TuneCPU.empty())
TuneCPU = CPU;
>From c503eca8bb5fb273c8440cd3638f1142928daa2f Mon Sep 17 00:00:00 2001
From: Djordje Todorovic <djordje.todorovic at htecgroup.com>
Date: Wed, 22 Jun 2022 21:16:47 +0200
Subject: [PATCH 4/6] [clang] Add driver support for riscv64-mti toolchains
Default to not using integrated as and adjust assembler output
to be compatible with v0.97 gas. Rudimentary big-endian support.
co-authored by Dragan Mladjenovic <Dragan.Mladjenovic at syrmia.com>
---
clang/lib/Driver/ToolChains/Arch/RISCV.cpp | 6 +-
clang/lib/Driver/ToolChains/Gnu.cpp | 119 +++++++++++++++++-
clang/lib/Driver/ToolChains/Linux.cpp | 25 +++-
.../lib/Driver/ToolChains/RISCVToolchain.cpp | 18 ++-
clang/lib/Driver/ToolChains/RISCVToolchain.h | 1 +
.../RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp | 8 ++
.../RISCV/MCTargetDesc/RISCVMCAsmInfo.h | 2 +
llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 2 +-
llvm/test/CodeGen/RISCV/div-by-constant.ll | 2 +-
.../CodeGen/RISCV/rv64i-exhaustive-w-insts.ll | 100 +++++++--------
llvm/test/CodeGen/RISCV/rv64zba.ll | 8 +-
llvm/test/CodeGen/RISCV/xaluo.ll | 22 ++--
llvm/test/CodeGen/RISCV/zcb-regalloc-hints.ll | 2 +-
llvm/test/MC/RISCV/rv64zba-aliases-valid.s | 4 +-
14 files changed, 236 insertions(+), 83 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
index 3363bb6803914c..7a2408477e2d7b 100644
--- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
@@ -336,12 +336,14 @@ std::string riscv::getRISCVArch(const llvm::opt::ArgList &Args,
// - On `riscv{XLEN}-unknown-elf` we default to `rv{XLEN}imac`
// - On all other OSs we use `rv{XLEN}imafdc` (equivalent to `rv{XLEN}gc`)
if (Triple.isRISCV32()) {
- if (Triple.getOS() == llvm::Triple::UnknownOS)
+ if (Triple.getOS() == llvm::Triple::UnknownOS &&
+ Triple.getVendor() != llvm::Triple::MipsTechnologies)
return "rv32imac";
else
return "rv32imafdc";
} else {
- if (Triple.getOS() == llvm::Triple::UnknownOS)
+ if (Triple.getOS() == llvm::Triple::UnknownOS &&
+ Triple.getVendor() != llvm::Triple::MipsTechnologies)
return "rv64imac";
else if (Triple.isAndroid())
return "rv64imafdcv_zba_zbb_zbs";
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index 8397f1121ec2ce..5c3e23b1f7ff18 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -252,9 +252,17 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) {
case llvm::Triple::ppc64le:
return "elf64lppc";
case llvm::Triple::riscv32:
- return "elf32lriscv";
- case llvm::Triple::riscv64:
- return "elf64lriscv";
+ case llvm::Triple::riscv64: {
+ bool IsBigEndian = false;
+ if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian,
+ options::OPT_mbig_endian))
+ IsBigEndian = A->getOption().matches(options::OPT_mbig_endian);
+
+ if (T.getArch() == llvm::Triple::riscv32)
+ return IsBigEndian ? "elf32briscv" : "elf32lriscv";
+ else
+ return IsBigEndian ? "elf64briscv" : "elf64lriscv";
+ }
case llvm::Triple::sparc:
case llvm::Triple::sparcel:
return "elf32_sparc";
@@ -402,6 +410,14 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(Arch == llvm::Triple::aarch64_be ? "-EB" : "-EL");
}
+ if (Triple.isRISCV() &&
+ Triple.getVendor() == llvm::Triple::MipsTechnologies) {
+ bool IsBigEndian = false;
+ if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian,
+ options::OPT_mbig_endian))
+ IsBigEndian = A->getOption().matches(options::OPT_mbig_endian);
+ CmdArgs.push_back(IsBigEndian ? "-EB" : "-EL");
+ }
// Most Android ARM64 targets should enable the linker fix for erratum
// 843419. Only non-Cortex-A53 devices are allowed to skip this flag.
if (Arch == llvm::Triple::aarch64 && (isAndroid || isOHOSFamily)) {
@@ -765,7 +781,8 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C,
}
case llvm::Triple::riscv32:
case llvm::Triple::riscv64: {
- StringRef ABIName = riscv::getRISCVABI(Args, getToolChain().getTriple());
+ const llvm::Triple &Triple = getToolChain().getTriple();
+ StringRef ABIName = riscv::getRISCVABI(Args, Triple);
CmdArgs.push_back("-mabi");
CmdArgs.push_back(ABIName.data());
std::string MArchName =
@@ -774,6 +791,14 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C,
CmdArgs.push_back(Args.MakeArgString(MArchName));
if (!Args.hasFlag(options::OPT_mrelax, options::OPT_mno_relax, true))
Args.addOptOutFlag(CmdArgs, options::OPT_mrelax, options::OPT_mno_relax);
+
+ if (Triple.getVendor() == llvm::Triple::MipsTechnologies) {
+ bool IsBigEndian = false;
+ if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian,
+ options::OPT_mbig_endian))
+ IsBigEndian = A->getOption().matches(options::OPT_mbig_endian);
+ CmdArgs.push_back(IsBigEndian ? "-EB" : "-EL");
+ }
break;
}
case llvm::Triple::sparc:
@@ -1873,9 +1898,18 @@ static void findRISCVBareMetalMultilibs(const Driver &D,
.flag(Twine("-march=", Element.march).str())
.flag(Twine("-mabi=", Element.mabi).str()));
}
+ SmallVector<MultilibBuilder, 2> Endian;
+ if (TargetTriple.getVendor() == llvm::Triple::MipsTechnologies) {
+ Endian.push_back(
+ MultilibBuilder("/riscv").flag("-EL").flag("-EB", /*Disallow=*/true));
+ Endian.push_back(
+ MultilibBuilder("/riscveb").flag("-EB").flag("-EL", /*Disallow=*/true));
+ }
MultilibSet RISCVMultilibs =
MultilibSetBuilder()
.Either(Ms)
+ .Either(Endian)
+ .Either(ArrayRef<MultilibBuilder>(Ms))
.makeMultilibSet()
.FilterOut(NonExistent)
.setFilePathsCallback([](const Multilib &M) {
@@ -1899,6 +1933,19 @@ static void findRISCVBareMetalMultilibs(const Driver &D,
}
}
+ bool IsBigEndian = false;
+ if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian,
+ options::OPT_mbig_endian))
+ IsBigEndian = A->getOption().matches(options::OPT_mbig_endian);
+
+ if (IsBigEndian) {
+ D.Diag(diag::err_drv_unsupported_opt_for_target)
+ << "-EB" << TargetTriple.str();
+ }
+
+ addMultilibFlag(IsBigEndian, "-EB", Flags);
+ addMultilibFlag(!IsBigEndian, "-EL", Flags);
+
if (selectRISCVMultilib(D, RISCVMultilibs, MArch, Flags,
Result.SelectedMultilibs))
Result.Multilibs = RISCVMultilibs;
@@ -1923,8 +1970,18 @@ static void findRISCVMultilibs(const Driver &D,
MultilibBuilder("lib64/lp64f").flag("-m64").flag("-mabi=lp64f");
MultilibBuilder Lp64d =
MultilibBuilder("lib64/lp64d").flag("-m64").flag("-mabi=lp64d");
+
+ SmallVector<MultilibBuilder, 2> Endian;
+ if (TargetTriple.getVendor() == llvm::Triple::MipsTechnologies) {
+ Endian.push_back(
+ MultilibBuilder("/riscv").flag("-EL").flag("-EB", /*Disallow=*/true));
+ Endian.push_back(
+ MultilibBuilder("/riscveb").flag("-EB").flag("-EL", /*Disallow=*/true));
+ }
+
MultilibSet RISCVMultilibs =
MultilibSetBuilder()
+ .Either(Endian)
.Either({Ilp32, Ilp32f, Ilp32d, Lp64, Lp64f, Lp64d})
.makeMultilibSet()
.FilterOut(NonExistent);
@@ -1932,6 +1989,15 @@ static void findRISCVMultilibs(const Driver &D,
Multilib::flags_list Flags;
bool IsRV64 = TargetTriple.getArch() == llvm::Triple::riscv64;
StringRef ABIName = tools::riscv::getRISCVABI(Args, TargetTriple);
+ bool IsBigEndian = false;
+ if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian,
+ options::OPT_mbig_endian))
+ IsBigEndian = A->getOption().matches(options::OPT_mbig_endian);
+
+ if (IsBigEndian) {
+ D.Diag(diag::err_drv_unsupported_opt_for_target)
+ << "-EB" << TargetTriple.str();
+ }
addMultilibFlag(!IsRV64, "-m32", Flags);
addMultilibFlag(IsRV64, "-m64", Flags);
@@ -1941,6 +2007,8 @@ static void findRISCVMultilibs(const Driver &D,
addMultilibFlag(ABIName == "lp64", "-mabi=lp64", Flags);
addMultilibFlag(ABIName == "lp64f", "-mabi=lp64f", Flags);
addMultilibFlag(ABIName == "lp64d", "-mabi=lp64d", Flags);
+ addMultilibFlag(IsBigEndian, "-EB", Flags);
+ addMultilibFlag(!IsBigEndian, "-EL", Flags);
if (RISCVMultilibs.select(D, Flags, Result.SelectedMultilibs))
Result.Multilibs = RISCVMultilibs;
@@ -2565,8 +2633,8 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
static const char *const RISCV32Triples[] = {"riscv32-unknown-linux-gnu",
"riscv32-unknown-elf"};
static const char *const RISCV64LibDirs[] = {"/lib64", "/lib"};
- static const char *const RISCV64Triples[] = {"riscv64-unknown-linux-gnu",
- "riscv64-unknown-elf"};
+ static const char *const RISCV64Triples[] = {
+ "riscv64-unknown-linux-gnu", "riscv64-unknown-elf", "riscv64-mti-elf"};
static const char *const SPARCv8LibDirs[] = {"/lib32", "/lib"};
static const char *const SPARCv8Triples[] = {"sparc-linux-gnu",
@@ -3137,6 +3205,45 @@ bool Generic_GCC::IsIntegratedAssemblerDefault() const {
case llvm::Triple::nvptx64:
case llvm::Triple::xcore:
return false;
+ case llvm::Triple::aarch64:
+ case llvm::Triple::aarch64_be:
+ case llvm::Triple::amdgcn:
+ case llvm::Triple::arm:
+ case llvm::Triple::armeb:
+ case llvm::Triple::avr:
+ case llvm::Triple::bpfel:
+ case llvm::Triple::bpfeb:
+ case llvm::Triple::csky:
+ case llvm::Triple::hexagon:
+ case llvm::Triple::lanai:
+ case llvm::Triple::loongarch32:
+ case llvm::Triple::loongarch64:
+ case llvm::Triple::m68k:
+ case llvm::Triple::mips:
+ case llvm::Triple::mipsel:
+ case llvm::Triple::mips64:
+ case llvm::Triple::mips64el:
+ case llvm::Triple::msp430:
+ case llvm::Triple::ppc:
+ case llvm::Triple::ppcle:
+ case llvm::Triple::ppc64:
+ case llvm::Triple::ppc64le:
+ case llvm::Triple::r600:
+ case llvm::Triple::sparc:
+ case llvm::Triple::sparcel:
+ case llvm::Triple::sparcv9:
+ case llvm::Triple::systemz:
+ case llvm::Triple::thumb:
+ case llvm::Triple::thumbeb:
+ case llvm::Triple::ve:
+ case llvm::Triple::x86:
+ case llvm::Triple::x86_64:
+ return true;
+ case llvm::Triple::riscv32:
+ case llvm::Triple::riscv64:
+ if (getTriple().getVendor() != llvm::Triple::MipsTechnologies)
+ return true;
+ return false;
default:
return true;
}
diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
index d1cb625613415b..cf10f7a1918b35 100644
--- a/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/clang/lib/Driver/ToolChains/Linux.cpp
@@ -274,11 +274,14 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
const bool IsHexagon = Arch == llvm::Triple::hexagon;
const bool IsRISCV = Triple.isRISCV();
const bool IsCSKY = Triple.isCSKY();
+ const bool IsMipsSysRoot =
+ IsMips ||
+ (IsRISCV && Triple.getVendor() == llvm::Triple::MipsTechnologies);
if (IsCSKY && !SelectedMultilibs.empty())
SysRoot = SysRoot + SelectedMultilibs.back().osSuffix();
- if ((IsMips || IsCSKY) && !SysRoot.empty())
+ if ((IsMipsSysRoot || IsCSKY) && !SysRoot.empty())
ExtraOpts.push_back("--sysroot=" + SysRoot);
// Do not use 'gnu' hash style for Mips targets because .gnu.hash
@@ -412,7 +415,12 @@ std::string Linux::computeSysRoot() const {
return std::string();
}
- if (!GCCInstallation.isValid() || !getTriple().isMIPS())
+ const bool IsMipsSysRoot =
+ getTriple().isMIPS() ||
+ (getTriple().isRISCV() &&
+ getTriple().getVendor() == llvm::Triple::MipsTechnologies);
+
+ if (!GCCInstallation.isValid() || !IsMipsSysRoot)
return std::string();
// Standalone MIPS toolchains use different names for sysroot folder
@@ -422,8 +430,19 @@ std::string Linux::computeSysRoot() const {
const StringRef InstallDir = GCCInstallation.getInstallPath();
const StringRef TripleStr = GCCInstallation.getTriple().str();
const Multilib &Multilib = GCCInstallation.getMultilib();
+ std::string Path;
+ if (getTriple().isRISCV()) {
+ Path =
+ (InstallDir + "/../../../../sysroot" + Multilib.osSuffix() + "/../..")
+ .str();
+
+ if (getVFS().exists(Path))
+ return Path;
+
+ return std::string();
+ }
- std::string Path =
+ Path =
(InstallDir + "/../../../../" + TripleStr + "/libc" + Multilib.osSuffix())
.str();
diff --git a/clang/lib/Driver/ToolChains/RISCVToolchain.cpp b/clang/lib/Driver/ToolChains/RISCVToolchain.cpp
index 624099d21ae124..72b4fd04220e2a 100644
--- a/clang/lib/Driver/ToolChains/RISCVToolchain.cpp
+++ b/clang/lib/Driver/ToolChains/RISCVToolchain.cpp
@@ -77,8 +77,8 @@ Tool *RISCVToolChain::buildLinker() const {
}
ToolChain::RuntimeLibType RISCVToolChain::GetDefaultRuntimeLibType() const {
- return GCCInstallation.isValid() ?
- ToolChain::RLT_Libgcc : ToolChain::RLT_CompilerRT;
+ return GCCInstallation.isValid() ? ToolChain::RLT_Libgcc
+ : ToolChain::RLT_CompilerRT;
}
ToolChain::UnwindLibType
@@ -173,6 +173,14 @@ void RISCV::Linker::ConstructJob(Compilation &C, const JobAction &JA,
}
CmdArgs.push_back("-X");
+ if (ToolChain.getTriple().getVendor() == llvm::Triple::MipsTechnologies) {
+ bool IsBigEndian = false;
+ if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian,
+ options::OPT_mbig_endian))
+ IsBigEndian = A->getOption().matches(options::OPT_mbig_endian);
+ CmdArgs.push_back(IsBigEndian ? "-EB" : "-EL");
+ }
+
std::string Linker = getToolChain().GetLinkerPath();
bool WantCRTs =
@@ -229,4 +237,10 @@ void RISCV::Linker::ConstructJob(Compilation &C, const JobAction &JA,
JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker),
CmdArgs, Inputs, Output));
}
+
+bool RISCVToolChain::IsIntegratedAssemblerDefault() const {
+ if (getTriple().getVendor() != llvm::Triple::MipsTechnologies)
+ return true;
+ return false;
+}
// RISCV tools end.
diff --git a/clang/lib/Driver/ToolChains/RISCVToolchain.h b/clang/lib/Driver/ToolChains/RISCVToolchain.h
index fa0aa265d842bb..c189772cc0d0ca 100644
--- a/clang/lib/Driver/ToolChains/RISCVToolchain.h
+++ b/clang/lib/Driver/ToolChains/RISCVToolchain.h
@@ -22,6 +22,7 @@ class LLVM_LIBRARY_VISIBILITY RISCVToolChain : public Generic_ELF {
const llvm::opt::ArgList &Args);
static bool hasGCCToolchain(const Driver &D, const llvm::opt::ArgList &Args);
+ bool IsIntegratedAssemblerDefault() const override;
void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind) const override;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
index 7b927522d39549..30ad5c84e996b6 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
@@ -45,3 +45,11 @@ const MCExpr *RISCVMCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym,
assert(Encoding & dwarf::DW_EH_PE_sdata4 && "Unexpected encoding");
return RISCVMCExpr::create(ME, RISCVMCExpr::VK_RISCV_32_PCREL, Ctx);
}
+
+void RISCVMCAsmInfo::setUseIntegratedAssembler(bool Value) {
+ UseIntegratedAssembler = Value;
+ if (!UseIntegratedAssembler) {
+ // gas doesn't handle non-constant <u>leb128
+ HasLEB128Directives = false;
+ }
+}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
index bceeb1256471d9..ea444add6e4154 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
@@ -26,6 +26,8 @@ class RISCVMCAsmInfo : public MCAsmInfoELF {
const MCExpr *getExprForFDESymbol(const MCSymbol *Sym, unsigned Encoding,
MCStreamer &Streamer) const override;
+ /// Set whether assembly (inline or otherwise) should be parsed.
+ void setUseIntegratedAssembler(bool Value) override;
};
} // namespace llvm
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index c57e7af3c56140..23713b6d49758d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -427,7 +427,7 @@ def UNZIP_RV32 : Unary_r<0b000010001111, 0b101, "unzip">,
// Pseudo Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZba, IsRV64] in {
+let Predicates = [HasStdExtZba, IsRV64], EmitPriority = 0 in {
def : InstAlias<"zext.w $rd, $rs", (ADD_UW GPR:$rd, GPR:$rs, X0)>;
} // Predicates = [HasStdExtZba, IsRV64]
diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll
index 844fa0d1e6ad6e..598992d362f477 100644
--- a/llvm/test/CodeGen/RISCV/div-by-constant.ll
+++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll
@@ -62,7 +62,7 @@ define i32 @udiv_constant_add(i32 %a) nounwind {
;
; RV64IMZB-LABEL: udiv_constant_add:
; RV64IMZB: # %bb.0:
-; RV64IMZB-NEXT: zext.w a1, a0
+; RV64IMZB-NEXT: add.uw a1, a0, zero
; RV64IMZB-NEXT: lui a2, 149797
; RV64IMZB-NEXT: addiw a2, a2, -1755
; RV64IMZB-NEXT: mul a1, a1, a2
diff --git a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
index dad20b2d194645..3707eb45125a52 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
@@ -188,7 +188,7 @@ define zeroext i32 @zext_addw_aext_aext(i32 %a, i32 %b) nounwind {
; RV64ZBA-LABEL: zext_addw_aext_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: add a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = add i32 %a, %b
ret i32 %1
@@ -205,7 +205,7 @@ define zeroext i32 @zext_addw_aext_sext(i32 %a, i32 signext %b) nounwind {
; RV64ZBA-LABEL: zext_addw_aext_sext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: add a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = add i32 %a, %b
ret i32 %1
@@ -222,7 +222,7 @@ define zeroext i32 @zext_addw_aext_zext(i32 %a, i32 zeroext %b) nounwind {
; RV64ZBA-LABEL: zext_addw_aext_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: add a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = add i32 %a, %b
ret i32 %1
@@ -239,7 +239,7 @@ define zeroext i32 @zext_addw_sext_aext(i32 signext %a, i32 %b) nounwind {
; RV64ZBA-LABEL: zext_addw_sext_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: add a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = add i32 %a, %b
ret i32 %1
@@ -256,7 +256,7 @@ define zeroext i32 @zext_addw_sext_sext(i32 signext %a, i32 signext %b) nounwind
; RV64ZBA-LABEL: zext_addw_sext_sext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: add a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = add i32 %a, %b
ret i32 %1
@@ -273,7 +273,7 @@ define zeroext i32 @zext_addw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind
; RV64ZBA-LABEL: zext_addw_sext_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: add a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = add i32 %a, %b
ret i32 %1
@@ -290,7 +290,7 @@ define zeroext i32 @zext_addw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
; RV64ZBA-LABEL: zext_addw_zext_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: add a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = add i32 %a, %b
ret i32 %1
@@ -307,7 +307,7 @@ define zeroext i32 @zext_addw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind
; RV64ZBA-LABEL: zext_addw_zext_sext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: add a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = add i32 %a, %b
ret i32 %1
@@ -324,7 +324,7 @@ define zeroext i32 @zext_addw_zext_zext(i32 zeroext %a, i32 zeroext %b) nounwind
; RV64ZBA-LABEL: zext_addw_zext_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: add a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = add i32 %a, %b
ret i32 %1
@@ -509,7 +509,7 @@ define zeroext i32 @zext_subw_aext_aext(i32 %a, i32 %b) nounwind {
; RV64ZBA-LABEL: zext_subw_aext_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: subw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
ret i32 %1
@@ -526,7 +526,7 @@ define zeroext i32 @zext_subw_aext_sext(i32 %a, i32 signext %b) nounwind {
; RV64ZBA-LABEL: zext_subw_aext_sext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: subw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
ret i32 %1
@@ -543,7 +543,7 @@ define zeroext i32 @zext_subw_aext_zext(i32 %a, i32 zeroext %b) nounwind {
; RV64ZBA-LABEL: zext_subw_aext_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: subw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
ret i32 %1
@@ -560,7 +560,7 @@ define zeroext i32 @zext_subw_sext_aext(i32 signext %a, i32 %b) nounwind {
; RV64ZBA-LABEL: zext_subw_sext_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: subw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
ret i32 %1
@@ -577,7 +577,7 @@ define zeroext i32 @zext_subw_sext_sext(i32 signext %a, i32 signext %b) nounwind
; RV64ZBA-LABEL: zext_subw_sext_sext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: subw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
ret i32 %1
@@ -594,7 +594,7 @@ define zeroext i32 @zext_subw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind
; RV64ZBA-LABEL: zext_subw_sext_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: subw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
ret i32 %1
@@ -611,7 +611,7 @@ define zeroext i32 @zext_subw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
; RV64ZBA-LABEL: zext_subw_zext_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: subw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
ret i32 %1
@@ -628,7 +628,7 @@ define zeroext i32 @zext_subw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind
; RV64ZBA-LABEL: zext_subw_zext_sext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: subw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
ret i32 %1
@@ -645,7 +645,7 @@ define zeroext i32 @zext_subw_zext_zext(i32 zeroext %a, i32 zeroext %b) nounwind
; RV64ZBA-LABEL: zext_subw_zext_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: subw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
ret i32 %1
@@ -828,7 +828,7 @@ define zeroext i32 @zext_sllw_aext_aext(i32 %a, i32 %b) nounwind {
; RV64ZBA-LABEL: zext_sllw_aext_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sllw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = shl i32 %a, %b
ret i32 %1
@@ -845,7 +845,7 @@ define zeroext i32 @zext_sllw_aext_sext(i32 %a, i32 signext %b) nounwind {
; RV64ZBA-LABEL: zext_sllw_aext_sext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sllw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = shl i32 %a, %b
ret i32 %1
@@ -862,7 +862,7 @@ define zeroext i32 @zext_sllw_aext_zext(i32 %a, i32 zeroext %b) nounwind {
; RV64ZBA-LABEL: zext_sllw_aext_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sllw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = shl i32 %a, %b
ret i32 %1
@@ -879,7 +879,7 @@ define zeroext i32 @zext_sllw_sext_aext(i32 signext %a, i32 %b) nounwind {
; RV64ZBA-LABEL: zext_sllw_sext_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sllw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = shl i32 %a, %b
ret i32 %1
@@ -896,7 +896,7 @@ define zeroext i32 @zext_sllw_sext_sext(i32 signext %a, i32 signext %b) nounwind
; RV64ZBA-LABEL: zext_sllw_sext_sext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sllw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = shl i32 %a, %b
ret i32 %1
@@ -913,7 +913,7 @@ define zeroext i32 @zext_sllw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind
; RV64ZBA-LABEL: zext_sllw_sext_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sllw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = shl i32 %a, %b
ret i32 %1
@@ -930,7 +930,7 @@ define zeroext i32 @zext_sllw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
; RV64ZBA-LABEL: zext_sllw_zext_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sllw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = shl i32 %a, %b
ret i32 %1
@@ -947,7 +947,7 @@ define zeroext i32 @zext_sllw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind
; RV64ZBA-LABEL: zext_sllw_zext_sext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sllw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = shl i32 %a, %b
ret i32 %1
@@ -964,7 +964,7 @@ define zeroext i32 @zext_sllw_zext_zext(i32 zeroext %a, i32 zeroext %b) nounwind
; RV64ZBA-LABEL: zext_sllw_zext_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sllw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = shl i32 %a, %b
ret i32 %1
@@ -1143,7 +1143,7 @@ define zeroext i32 @zext_srlw_aext_aext(i32 %a, i32 %b) nounwind {
; RV64ZBA-LABEL: zext_srlw_aext_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: srlw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = lshr i32 %a, %b
ret i32 %1
@@ -1160,7 +1160,7 @@ define zeroext i32 @zext_srlw_aext_sext(i32 %a, i32 signext %b) nounwind {
; RV64ZBA-LABEL: zext_srlw_aext_sext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: srlw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = lshr i32 %a, %b
ret i32 %1
@@ -1177,7 +1177,7 @@ define zeroext i32 @zext_srlw_aext_zext(i32 %a, i32 zeroext %b) nounwind {
; RV64ZBA-LABEL: zext_srlw_aext_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: srlw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = lshr i32 %a, %b
ret i32 %1
@@ -1194,7 +1194,7 @@ define zeroext i32 @zext_srlw_sext_aext(i32 signext %a, i32 %b) nounwind {
; RV64ZBA-LABEL: zext_srlw_sext_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: srlw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = lshr i32 %a, %b
ret i32 %1
@@ -1211,7 +1211,7 @@ define zeroext i32 @zext_srlw_sext_sext(i32 signext %a, i32 signext %b) nounwind
; RV64ZBA-LABEL: zext_srlw_sext_sext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: srlw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = lshr i32 %a, %b
ret i32 %1
@@ -1228,7 +1228,7 @@ define zeroext i32 @zext_srlw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind
; RV64ZBA-LABEL: zext_srlw_sext_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: srlw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = lshr i32 %a, %b
ret i32 %1
@@ -1245,7 +1245,7 @@ define zeroext i32 @zext_srlw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
; RV64ZBA-LABEL: zext_srlw_zext_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: srlw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = lshr i32 %a, %b
ret i32 %1
@@ -1262,7 +1262,7 @@ define zeroext i32 @zext_srlw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind
; RV64ZBA-LABEL: zext_srlw_zext_sext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: srlw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = lshr i32 %a, %b
ret i32 %1
@@ -1279,7 +1279,7 @@ define zeroext i32 @zext_srlw_zext_zext(i32 zeroext %a, i32 zeroext %b) nounwind
; RV64ZBA-LABEL: zext_srlw_zext_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: srlw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = lshr i32 %a, %b
ret i32 %1
@@ -1458,7 +1458,7 @@ define zeroext i32 @zext_sraw_aext_aext(i32 %a, i32 %b) nounwind {
; RV64ZBA-LABEL: zext_sraw_aext_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sraw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = ashr i32 %a, %b
ret i32 %1
@@ -1475,7 +1475,7 @@ define zeroext i32 @zext_sraw_aext_sext(i32 %a, i32 signext %b) nounwind {
; RV64ZBA-LABEL: zext_sraw_aext_sext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sraw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = ashr i32 %a, %b
ret i32 %1
@@ -1492,7 +1492,7 @@ define zeroext i32 @zext_sraw_aext_zext(i32 %a, i32 zeroext %b) nounwind {
; RV64ZBA-LABEL: zext_sraw_aext_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sraw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = ashr i32 %a, %b
ret i32 %1
@@ -1509,7 +1509,7 @@ define zeroext i32 @zext_sraw_sext_aext(i32 signext %a, i32 %b) nounwind {
; RV64ZBA-LABEL: zext_sraw_sext_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sraw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = ashr i32 %a, %b
ret i32 %1
@@ -1526,7 +1526,7 @@ define zeroext i32 @zext_sraw_sext_sext(i32 signext %a, i32 signext %b) nounwind
; RV64ZBA-LABEL: zext_sraw_sext_sext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sraw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = ashr i32 %a, %b
ret i32 %1
@@ -1543,7 +1543,7 @@ define zeroext i32 @zext_sraw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind
; RV64ZBA-LABEL: zext_sraw_sext_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sraw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = ashr i32 %a, %b
ret i32 %1
@@ -1560,7 +1560,7 @@ define zeroext i32 @zext_sraw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
; RV64ZBA-LABEL: zext_sraw_zext_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sraw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = ashr i32 %a, %b
ret i32 %1
@@ -1577,7 +1577,7 @@ define zeroext i32 @zext_sraw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind
; RV64ZBA-LABEL: zext_sraw_zext_sext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sraw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = ashr i32 %a, %b
ret i32 %1
@@ -1594,7 +1594,7 @@ define zeroext i32 @zext_sraw_zext_zext(i32 zeroext %a, i32 zeroext %b) nounwind
; RV64ZBA-LABEL: zext_sraw_zext_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sraw a0, a0, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = ashr i32 %a, %b
ret i32 %1
@@ -1667,7 +1667,7 @@ define zeroext i32 @zext_addiw_aext(i32 %a) nounwind {
; RV64ZBA-LABEL: zext_addiw_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: addi a0, a0, 7
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = add i32 %a, 7
ret i32 %1
@@ -1684,7 +1684,7 @@ define zeroext i32 @zext_addiw_sext(i32 signext %a) nounwind {
; RV64ZBA-LABEL: zext_addiw_sext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: addi a0, a0, 8
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = add i32 %a, 8
ret i32 %1
@@ -1701,7 +1701,7 @@ define zeroext i32 @zext_addiw_zext(i32 zeroext %a) nounwind {
; RV64ZBA-LABEL: zext_addiw_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: addi a0, a0, 9
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = add i32 %a, 9
ret i32 %1
@@ -1944,7 +1944,7 @@ define zeroext i32 @zext_sraiw_aext(i32 %a) nounwind {
; RV64ZBA-LABEL: zext_sraiw_aext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sraiw a0, a0, 7
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = ashr i32 %a, 7
ret i32 %1
@@ -1971,7 +1971,7 @@ define zeroext i32 @zext_sraiw_zext(i32 zeroext %a) nounwind {
; RV64ZBA-LABEL: zext_sraiw_zext:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sraiw a0, a0, 9
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%1 = ashr i32 %a, 9
ret i32 %1
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index 5c27b84181a496..f8134c0adf5b73 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -93,7 +93,7 @@ define i64 @zextw_i64(i64 %a) nounwind {
;
; RV64ZBA-LABEL: zextw_i64:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%and = and i64 %a, 4294967295
ret i64 %and
@@ -112,7 +112,7 @@ define i64 @zextw_demandedbits_i64(i64 %0) {
; RV64ZBA-LABEL: zextw_demandedbits_i64:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: ori a0, a0, 1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
%2 = and i64 %0, 4294967294
%3 = or i64 %2, 1
@@ -1177,7 +1177,7 @@ define i64 @adduw_imm(i32 signext %0) nounwind {
;
; RV64ZBA-LABEL: adduw_imm:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: addi a0, a0, 5
; RV64ZBA-NEXT: ret
%a = zext i32 %0 to i64
@@ -1244,7 +1244,7 @@ define i64 @imm_zextw() nounwind {
; RV64ZBA-LABEL: imm_zextw:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: li a0, -2
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: ret
ret i64 4294967294 ; -2 in 32 bits.
}
diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll
index c0cbbb3ff9389e..cadaf71fbcda63 100644
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -1690,8 +1690,8 @@ define zeroext i1 @umulo.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
;
; RV64ZBA-LABEL: umulo.i32:
; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: zext.w a1, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a1, a1, zero
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: mul a1, a0, a1
; RV64ZBA-NEXT: srli a0, a1, 32
; RV64ZBA-NEXT: snez a0, a0
@@ -1759,7 +1759,7 @@ define zeroext i1 @umulo2.i32(i32 signext %v1, ptr %res) {
;
; RV64ZBA-LABEL: umulo2.i32:
; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: zext.w a2, a0
+; RV64ZBA-NEXT: add.uw a2, a0, zero
; RV64ZBA-NEXT: sh1add.uw a0, a0, a2
; RV64ZBA-NEXT: sh2add a2, a0, a2
; RV64ZBA-NEXT: srli a0, a2, 32
@@ -1828,8 +1828,8 @@ define signext i32 @umulo3.i32(i32 signext %0, i32 signext %1, ptr %2) {
;
; RV64ZBA-LABEL: umulo3.i32:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: zext.w a1, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a1, a1, zero
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: mul a3, a0, a1
; RV64ZBA-NEXT: srli a3, a3, 32
; RV64ZBA-NEXT: snez a3, a3
@@ -3748,8 +3748,8 @@ define i32 @umulo.select.i32(i32 signext %v1, i32 signext %v2) {
;
; RV64ZBA-LABEL: umulo.select.i32:
; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: zext.w a2, a1
-; RV64ZBA-NEXT: zext.w a3, a0
+; RV64ZBA-NEXT: add.uw a2, a1, zero
+; RV64ZBA-NEXT: add.uw a3, a0, zero
; RV64ZBA-NEXT: mul a2, a3, a2
; RV64ZBA-NEXT: srli a2, a2, 32
; RV64ZBA-NEXT: bnez a2, .LBB48_2
@@ -3807,8 +3807,8 @@ define i1 @umulo.not.i32(i32 signext %v1, i32 signext %v2) {
;
; RV64ZBA-LABEL: umulo.not.i32:
; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: zext.w a1, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a1, a1, zero
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: mul a0, a0, a1
; RV64ZBA-NEXT: srli a0, a0, 32
; RV64ZBA-NEXT: seqz a0, a0
@@ -5323,8 +5323,8 @@ define zeroext i1 @umulo.br.i32(i32 signext %v1, i32 signext %v2) {
;
; RV64ZBA-LABEL: umulo.br.i32:
; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: zext.w a1, a1
-; RV64ZBA-NEXT: zext.w a0, a0
+; RV64ZBA-NEXT: add.uw a1, a1, zero
+; RV64ZBA-NEXT: add.uw a0, a0, zero
; RV64ZBA-NEXT: mul a0, a0, a1
; RV64ZBA-NEXT: srli a0, a0, 32
; RV64ZBA-NEXT: beqz a0, .LBB63_2
diff --git a/llvm/test/CodeGen/RISCV/zcb-regalloc-hints.ll b/llvm/test/CodeGen/RISCV/zcb-regalloc-hints.ll
index 545d6c6aca0414..639252428bb547 100644
--- a/llvm/test/CodeGen/RISCV/zcb-regalloc-hints.ll
+++ b/llvm/test/CodeGen/RISCV/zcb-regalloc-hints.ll
@@ -76,7 +76,7 @@ define i64 @c_zext_h(i64 %x, i16 %y) {
define i64 @c_zext_w(i64 %x, i32 %y) {
; CHECK-LABEL: c_zext_w:
; CHECK: # %bb.0:
-; CHECK-NEXT: zext.w a1, a1
+; CHECK-NEXT: add.uw a1, a1, zero
; CHECK-NEXT: li a0, 1234
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: ret
diff --git a/llvm/test/MC/RISCV/rv64zba-aliases-valid.s b/llvm/test/MC/RISCV/rv64zba-aliases-valid.s
index bb8eeb41dde75e..1397f0aeacb830 100644
--- a/llvm/test/MC/RISCV/rv64zba-aliases-valid.s
+++ b/llvm/test/MC/RISCV/rv64zba-aliases-valid.s
@@ -16,13 +16,13 @@
# aliases disabled
# CHECK-S-OBJ-NOALIAS: add.uw t0, t1, zero
-# CHECK-S-OBJ: zext.w t0, t1
+# CHECK-S-OBJ: add.uw t0, t1, zero
zext.w x5, x6
# CHECK-S-OBJ-NOALIAS: addi t1, zero, -2
# CHECK-S-OBJ-NOALIAS-NEXT: add.uw t1, t1, zero
# CHECK-S-OBJ: li t1, -2
-# CHECK-S-OBJ-NEXT: zext.w t1, t1
+# CHECK-S-OBJ-NEXT: add.uw t1, t1, zero
li x6, 0xfffffffe
# CHECK-S-OBJ-NOALIAS: lui t0, 768955
>From d35403fcad1b542429bb098ae02d7bf231c52178 Mon Sep 17 00:00:00 2001
From: Djordje Todorovic <djordje.todorovic at htecgroup.com>
Date: Thu, 29 Sep 2022 15:29:50 +0200
Subject: [PATCH 5/6] [RISCV] Enable SeparateConstOffsetFromGEPPass for RISC-V
We see benefits in terms of performance.
CoreMark benchmarking demonstrates a 2.5% performance improvement.
---
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 15 ++++++++++++++-
llvm/test/CodeGen/RISCV/riscv-enable-gep-opt.ll | 10 ++++++++++
2 files changed, 24 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/RISCV/riscv-enable-gep-opt.ll
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index d361e20346184f..6074c4461a8ebf 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -81,6 +81,10 @@ static cl::opt<bool> EnableRISCVCopyPropagation(
"riscv-enable-copy-propagation",
cl::desc("Enable the copy propagation with RISC-V copy instr"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ EnableGEPOpt("riscv-enable-gep-opt", cl::Hidden,
+ cl::desc("Enable optimizations on complex GEPs"),
+ cl::init(false));
static cl::opt<bool> EnableRISCVDeadRegisterElimination(
"riscv-enable-dead-defs", cl::Hidden,
@@ -373,7 +377,6 @@ class RISCVPassConfig : public TargetPassConfig {
if (!ST.getMacroFusions().empty()) {
DAG = DAG ? DAG : createGenericSchedLive(C);
- const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
if (ST.useLoadStorePairs()) {
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
@@ -466,6 +469,16 @@ void RISCVPassConfig::addIRPasses() {
addPass(createAtomicExpandLegacyPass());
addPass(createRISCVZacasABIFixPass());
+ if (TM->getOptLevel() == CodeGenOptLevel::Aggressive && EnableGEPOpt) {
+ addPass(createSeparateConstOffsetFromGEPPass(false));
+ // Call EarlyCSE pass to find and remove subexpressions in the lowered
+ // result.
+ addPass(createEarlyCSEPass());
+ // Do loop invariant code motion in case part of the lowered result is
+ // invariant.
+ addPass(createLICMPass());
+ }
+
if (getOptLevel() != CodeGenOptLevel::None) {
if (EnableLoopDataPrefetch)
addPass(createLoopDataPrefetchPass());
diff --git a/llvm/test/CodeGen/RISCV/riscv-enable-gep-opt.ll b/llvm/test/CodeGen/RISCV/riscv-enable-gep-opt.ll
new file mode 100644
index 00000000000000..3191e7e38e5ce7
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/riscv-enable-gep-opt.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mtriple=riscv32 -O3 -riscv-enable-gep-opt=true -debug-pass=Structure < %s -o /dev/null 2>&1 | \
+; RUN: grep -v "Verify generated machine code" | \
+; RUN: FileCheck %s --check-prefixes=CHECK
+
+
+; REQUIRES: asserts
+
+; CHECK-LABEL: Pass Arguments:
+; CHECK: Split GEPs to a variadic base and a constant offset for better CSE
+
>From 4f3953147ab0f7c79cb1a90807fbddde8439fc71 Mon Sep 17 00:00:00 2001
From: Djordje Todorovic <djordje.todorovic at htecgroup.com>
Date: Mon, 6 Feb 2023 14:51:50 +0100
Subject: [PATCH 6/6] [RISCV] Add RemoveBackToBackBranches Pass
According to the MIPS specification, there shouldn't be
two conditional branches in the same 8-byte aligned
region of code.
---
clang/include/clang/Driver/Options.td | 1 +
clang/lib/Driver/ToolChains/Clang.cpp | 4 +
llvm/lib/Target/RISCV/CMakeLists.txt | 1 +
llvm/lib/Target/RISCV/RISCV.h | 3 +
.../RISCV/RISCVRemoveBackToBackBranches.cpp | 158 ++++++++++++++++++
llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 9 +
llvm/lib/Target/RISCV/RISCVSubtarget.h | 5 +-
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 3 +-
.../riscv-remove-back-to-back-branches.mir | 108 ++++++++++++
llvm/test/CodeGen/RISCV/O0-pipeline.ll | 3 +-
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 3 +-
11 files changed, 292 insertions(+), 6 deletions(-)
create mode 100644 llvm/lib/Target/RISCV/RISCVRemoveBackToBackBranches.cpp
create mode 100644 llvm/test/CodeGen/MIR/RISCV/riscv-remove-back-to-back-branches.mir
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 5527be23aa3acd..a0873beeaebd92 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4941,6 +4941,7 @@ def mload_store_pairs : Flag<["-"], "mload-store-pairs">, Group<m_riscv_Features
def mno_load_store_pairs : Flag<["-"], "mno-load-store-pairs">, Group<m_riscv_Features_Group>;
def mccmov : Flag<["-"], "mccmov">, Group<m_riscv_Features_Group>;
def mno_ccmov : Flag<["-"], "mno-ccmov">, Group<m_riscv_Features_Group>;
+def mremove_back_to_back_branches : Flag<["-"], "mremove_back_to_back_branches">, Group<m_riscv_Features_Group>;
let Flags = [TargetSpecific] in {
def menable_experimental_extensions : Flag<["-"], "menable-experimental-extensions">, Group<m_Group>,
HelpText<"Enable use of experimental RISC-V extensions.">;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 56acc93a3ed9d7..dcf60f99e688ac 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2178,6 +2178,10 @@ void Clang::AddRISCVTargetArgs(const ArgList &Args,
CmdArgs.push_back("-riscv-ccmov=0");
}
}
+ if (Args.getLastArg(options::OPT_mremove_back_to_back_branches)) {
+ CmdArgs.push_back("-mllvm");
+ CmdArgs.push_back("-riscv-remove-back-to-back-branches=1");
+ }
// Handle -mrvv-vector-bits=<bits>
if (Arg *A = Args.getLastArg(options::OPT_mrvv_vector_bits_EQ)) {
StringRef Val = A->getValue();
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 5a353a1c690b9e..2f63d7f1e4c39f 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -55,6 +55,7 @@ add_llvm_target(RISCVCodeGen
RISCVMoveMerger.cpp
RISCVPushPopOptimizer.cpp
RISCVRegisterInfo.cpp
+ RISCVRemoveBackToBackBranches.cpp
RISCVSubtarget.cpp
RISCVTargetMachine.cpp
RISCVTargetObjectFile.cpp
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index b1aee98739e852..1f12f77e7dc1aa 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -96,6 +96,9 @@ createRISCVInstructionSelector(const RISCVTargetMachine &,
const RISCVRegisterBankInfo &);
void initializeRISCVDAGToDAGISelLegacyPass(PassRegistry &);
+FunctionPass *createRISCVRemoveBackToBackBranches();
+void initializeRISCVRemoveBackToBackBranchesPass(PassRegistry &);
+
FunctionPass *createRISCVPostLegalizerCombiner();
void initializeRISCVPostLegalizerCombinerPass(PassRegistry &);
diff --git a/llvm/lib/Target/RISCV/RISCVRemoveBackToBackBranches.cpp b/llvm/lib/Target/RISCV/RISCVRemoveBackToBackBranches.cpp
new file mode 100644
index 00000000000000..55b8d263f6f112
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVRemoveBackToBackBranches.cpp
@@ -0,0 +1,158 @@
+//===----------------------- RISCVRemoveBackToBackBranches.cpp ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-remove-back-to-back-branches"
+
+STATISTIC(NumInsertedAligments, "Number of aligments set");
+
+namespace {
+
+// According to the MIPS specification, there shouldn't be two conditional
+// branches in the same 8-byte aligned region of code.
+constexpr unsigned NumberOfBytesOfCodeRegion = 8;
+
+class RISCVRemoveBackToBackBranches : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RISCVRemoveBackToBackBranches() : MachineFunctionPass(ID) {
+ initializeRISCVRemoveBackToBackBranchesPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "RISCV Remove Back To Back Branches Pass";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+private:
+ const RISCVSubtarget *STI;
+ const RISCVInstrInfo *TII;
+};
+
+} // end of anonymous namespace
+
+char RISCVRemoveBackToBackBranches::ID = 0;
+
+INITIALIZE_PASS(RISCVRemoveBackToBackBranches, DEBUG_TYPE,
+ "Fix hazards by removing back to back branches", false, false)
+
+/// Returns a pass that clears pipeline hazards.
+FunctionPass *llvm::createRISCVRemoveBackToBackBranches() {
+ return new RISCVRemoveBackToBackBranches();
+}
+
+static bool CheckCompressedISA(MachineBasicBlock *MBB,
+ const RISCVInstrInfo *TII) {
+ unsigned SizeInBytes = 0;
+ for (auto &I : *MBB) {
+ // Skip some 0-sized meta instrucitons, such as debug ones.
+ if (!TII->getInstSizeInBytes(I))
+ continue;
+
+ SizeInBytes += TII->getInstSizeInBytes(I);
+
+ // This means that there is something other than the conditional branch
+ // here.
+ if (!I.isConditionalBranch())
+ continue;
+
+ // If it is a conditional branch, make sure it is the last one
+ // in this MBB and the cumulative size in bytes of other instructions in the
+ // block is <= 6 (since there potentially could be space for the two
+ // branches in the same 8-byte aligned code region, when compressed version
+ // of the instructions (16-bit size) is being used).
+ if (&I == &*MBB->getLastNonDebugInstr()) {
+ if (SizeInBytes <= 6)
+ return true;
+ return false;
+ }
+ }
+
+ return false;
+}
+
+static bool CheckNonCompressedISA(MachineBasicBlock *MBB,
+ const RISCVInstrInfo *TII) {
+ for (auto &I : *MBB) {
+ // Skip some 0-sized meta instrucitons, such as debug ones.
+ if (!TII->getInstSizeInBytes(I))
+ continue;
+
+ // This means that there is something other than the conditional branch
+ // here.
+ if (!I.isConditionalBranch())
+ return false;
+
+ // If it is a conditional branch, make sure it is the last one
+ // in this MBB.
+ if (&I == &*MBB->getLastNonDebugInstr())
+ return true;
+ return false;
+ }
+ return false;
+}
+
+bool RISCVRemoveBackToBackBranches::runOnMachineFunction(MachineFunction &MF) {
+ STI = &static_cast<const RISCVSubtarget &>(MF.getSubtarget());
+ TII = static_cast<const RISCVInstrInfo *>(STI->getInstrInfo());
+
+ if (!STI->shouldRemoveBackToBackBranches()) {
+ LLVM_DEBUG(llvm::dbgs()
+ << "Ignoring RISCV Remove Back To Back Branches Pass\n");
+ return false;
+ }
+
+ bool Changed = false;
+ for (auto &MBB : MF) {
+ auto BBTerminator = MBB.getFirstTerminator();
+ // If it is not a conditional branch, we are not interested.
+ if (BBTerminator == MBB.end() ||
+ &*BBTerminator != &*MBB.getLastNonDebugInstr() ||
+ !BBTerminator->isConditionalBranch())
+ continue;
+
+ for (auto &Successor : MBB.successors()) {
+ // Set up aligment in order to avoid hazards. No 2 conditional branches
+ // should be in the same 8-byte aligned region of code. Similar to MIPS
+ // forbidden slots problem. We may want to insert a NOP only, but we
+ // need to think of Compressed ISA, so it is more safe to just set up
+ // aligment to the successor block if it meets requirements.
+ bool ShouldSetAligment = STI->getFeatureBits()[RISCV::FeatureStdExtC]
+ ? CheckCompressedISA(Successor, TII)
+ : CheckNonCompressedISA(Successor, TII);
+ if (ShouldSetAligment) {
+ Successor->setAlignment(Align(NumberOfBytesOfCodeRegion));
+ Changed = true;
+ ++NumInsertedAligments;
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index c7a1405ed2d45e..96ff5975d48970 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -71,6 +71,11 @@ static cl::opt<bool> UseCCMovInsn("riscv-ccmov",
cl::desc("RISCV: Use 'ccmov' instruction"),
cl::init(true), cl::Hidden);
+static cl::opt<bool> RISCVRemoveBackToBackBranches(
+ "riscv-remove-back-to-back-branches",
+ cl::desc("RISCV: Insert nops to clear pipeline hazards."), cl::init(false),
+ cl::Hidden);
+
void RISCVSubtarget::anchor() {}
RISCVSubtarget &
@@ -233,3 +238,7 @@ bool RISCVSubtarget::useLoadStorePairs() const {
bool RISCVSubtarget::useCCMovInsn() const {
return UseCCMovInsn && HasMIPSCMov;
}
+
+bool RISCVSubtarget::shouldRemoveBackToBackBranches() const {
+ return RISCVRemoveBackToBackBranches && hasFeature(RISCV::TuneMIPSP8700);
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 057b7769db6951..cbe28a0416ff66 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -169,9 +169,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
MVT getXLenVT() const {
return is64Bit() ? MVT::i64 : MVT::i32;
}
- unsigned getXLen() const {
- return is64Bit() ? 64 : 32;
- }
+ unsigned getXLen() const { return is64Bit() ? 64 : 32; }
+ bool shouldRemoveBackToBackBranches() const;
bool useLoadStorePairs() const;
bool useCCMovInsn() const;
unsigned getFLen() const {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 6074c4461a8ebf..57169161a8a3f0 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -579,13 +579,14 @@ void RISCVPassConfig::addPreEmitPass() {
if (TM->getOptLevel() >= CodeGenOptLevel::Default &&
EnableRISCVCopyPropagation)
addPass(createMachineCopyPropagationPass(true));
- addPass(&BranchRelaxationPassID);
addPass(createRISCVMakeCompressibleOptPass());
// LoadStoreOptimizer creates bundles for load-store bonding.
addPass(createUnpackMachineBundles([](const MachineFunction &MF) {
return MF.getSubtarget<RISCVSubtarget>().useLoadStorePairs();
}));
+ addPass(&BranchRelaxationPassID);
+ addPass(createRISCVRemoveBackToBackBranches());
}
void RISCVPassConfig::addPreEmitPass2() {
diff --git a/llvm/test/CodeGen/MIR/RISCV/riscv-remove-back-to-back-branches.mir b/llvm/test/CodeGen/MIR/RISCV/riscv-remove-back-to-back-branches.mir
new file mode 100644
index 00000000000000..448414678fa061
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/RISCV/riscv-remove-back-to-back-branches.mir
@@ -0,0 +1,108 @@
+# RUN: llc -mtriple=riscv64 -mattr=-c -riscv-remove-back-to-back-branches=1 -o - %s | FileCheck %s
+
+# CHECK: %bb.0:
+# CHECK: blez
+# CHECK: .p2align 3
+# CHECK: %bb.1:
+# CHECK: blez
+
+--- |
+ ; ModuleID = 'hazaard.c'
+ source_filename = "hazaard.c"
+ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
+ target triple = "riscv64-unknown-linux-gnu"
+
+ ; Function Attrs: nounwind optsize
+ define dso_local void @test(i32 noundef signext %a, i32 noundef signext %b) local_unnamed_addr #0 {
+ entry:
+ %cmp = icmp sgt i32 %a, 0
+ br i1 %cmp, label %if.then, label %if.end3
+
+ if.then: ; preds = %entry
+ %cmp1 = icmp slt i32 %b, 1
+ br i1 %cmp1, label %if.then2, label %if.end3
+
+ if.then2: ; preds = %if.then
+ tail call void asm sideeffect "nop", ""() #1, !srcloc !4
+ ret void
+
+ if.end3: ; preds = %if.then, %entry
+ ret void
+ }
+
+ attributes #0 = { nounwind optsize "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="mips-p8700" "target-features"="+64bit,+a,+c,+d,+f,+m" }
+ attributes #1 = { nounwind }
+
+ !llvm.module.flags = !{!0, !1, !2}
+ !llvm.ident = !{!3}
+
+ !0 = !{i32 1, !"wchar_size", i32 4}
+ !1 = !{i32 1, !"target-abi", !"lp64d"}
+ !2 = !{i32 1, !"SmallDataLimit", i32 8}
+ !3 = !{!"clang version 14.0.0 (git at github.com:MIPS/llvm.git ae54cf4034587fab977092097c9772c7a275ddc8)"}
+ !4 = !{i64 88}
+
+...
+---
+name: test
+alignment: 2
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+failsVerification: false
+tracksDebugUserValues: true
+registers: []
+liveins:
+ - { reg: '$x10', virtual-reg: '' }
+ - { reg: '$x11', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x50000000), %bb.2(0x30000000)
+ liveins: $x10, $x11
+
+ BGE $x0, killed renamable $x10, %bb.2
+
+ bb.1.if.then:
+ successors: %bb.3(0x30000000), %bb.2(0x50000000)
+ liveins: $x11
+
+ BGE $x0, killed renamable $x11, %bb.3
+
+ bb.2.if.end3:
+ PseudoRET
+
+ bb.3.if.then2:
+ INLINEASM &nop, 1 /* sideeffect attdialect */, !4
+ PseudoRET
+
+...
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index 5ee6c192b80291..083b39b881243c 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -61,9 +61,10 @@
; CHECK-NEXT: Insert fentry calls
; CHECK-NEXT: Insert XRay ops
; CHECK-NEXT: Implement the 'patchable-function' attribute
-; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: RISC-V Make Compressible
; CHECK-NEXT: Unpack machine instruction bundles
+; CHECK-NEXT: Branch relaxation pass
+; CHECK-NEXT: RISCV Remove Back To Back Branches Pass
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 0dd3dbbafc336c..1f91071e7331ea 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -188,9 +188,10 @@
; CHECK-NEXT: Insert XRay ops
; CHECK-NEXT: Implement the 'patchable-function' attribute
; CHECK-NEXT: Machine Copy Propagation Pass
-; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: RISC-V Make Compressible
; CHECK-NEXT: Unpack machine instruction bundles
+; CHECK-NEXT: Branch relaxation pass
+; CHECK-NEXT: RISCV Remove Back To Back Branches Pass
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
More information about the cfe-commits
mailing list