[llvm] [RISCV] Add software pipeliner support (PR #117546)

Pengcheng Wang via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 3 20:05:21 PST 2024


https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/117546

>From 415c30ef2e6f6650371ac344525baefe48892400 Mon Sep 17 00:00:00 2001
From: wangpc <wangpengcheng.pp at bytedance.com>
Date: Mon, 25 Nov 2024 16:54:44 +0800
Subject: [PATCH 1/3] [RISCV] Add software pipeliner support

This patch adds basic support of `MachinePipeliner` and disable
it by default.

The functionality should be OK and all llvm-test-suite tests have
passed.
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp     | 81 ++++++++++++++++++++
 llvm/lib/Target/RISCV/RISCVInstrInfo.h       |  3 +
 llvm/lib/Target/RISCV/RISCVSubtarget.cpp     |  4 +
 llvm/lib/Target/RISCV/RISCVSubtarget.h       |  4 +
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp |  8 ++
 llvm/test/CodeGen/RISCV/machine-pipeliner.ll | 78 +++++++++++++++++++
 6 files changed, 178 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/machine-pipeliner.ll

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 47273d6bc06d65..659374efac955a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -4213,3 +4213,84 @@ bool RISCV::isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) {
     return false;
   return LHS.getImm() <= RHS.getImm();
 }
+
+namespace {
+class RISCVPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
+  const MachineInstr *LHS;
+  const MachineInstr *RHS;
+  SmallVector<MachineOperand, 4> Cond;
+
+public:
+  RISCVPipelinerLoopInfo(const MachineInstr *LHS, const MachineInstr *RHS,
+                         const SmallVectorImpl<MachineOperand> &Cond)
+      : LHS(LHS), RHS(RHS), Cond(Cond.begin(), Cond.end()) {}
+
+  bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
+    // Make the instructions for loop control be placed in stage 0.
+    // The predecessors of PredBranch are considered by the caller.
+    if (LHS && MI == LHS)
+      return true;
+    if (RHS && MI == RHS)
+      return true;
+    return false;
+  }
+
+  std::optional<bool> createTripCountGreaterCondition(
+      int TC, MachineBasicBlock &MBB,
+      SmallVectorImpl<MachineOperand> &CondParam) override {
+    // A branch instruction will be inserted as "if (Cond) goto epilogue".
+    // Cond is normalized for such use.
+    // The predecessors of the branch are assumed to have already been inserted.
+    CondParam = Cond;
+    return {};
+  }
+
+  void setPreheader(MachineBasicBlock *NewPreheader) override {}
+
+  void adjustTripCount(int TripCountAdjust) override {}
+
+  void disposed() override {}
+};
+} // namespace
+
+std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
+RISCVInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
+  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+  SmallVector<MachineOperand, 4> Cond;
+  if (analyzeBranch(*LoopBB, TBB, FBB, Cond, /*AllowModify=*/false))
+    return nullptr;
+
+  // Infinite loops are not supported
+  if (TBB == LoopBB && FBB == LoopBB)
+    return nullptr;
+
+  // Must be conditional branch
+  if (FBB == nullptr)
+    return nullptr;
+
+  assert((TBB == LoopBB || FBB == LoopBB) &&
+         "The Loop must be a single-basic-block loop");
+
+  // Normalization for createTripCountGreaterCondition()
+  if (TBB == LoopBB)
+    reverseBranchCondition(Cond);
+
+  const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
+  auto FindRegDef = [&MRI](MachineOperand &Op) -> const MachineInstr * {
+    if (!Op.isReg())
+      return nullptr;
+    Register Reg = Op.getReg();
+    if (!Reg.isVirtual())
+      return nullptr;
+    return MRI.getVRegDef(Reg);
+  };
+
+  const MachineInstr *LHS = FindRegDef(Cond[1]);
+  const MachineInstr *RHS = FindRegDef(Cond[2]);
+  if (LHS && LHS->isPHI())
+    return nullptr;
+  if (RHS && RHS->isPHI())
+    return nullptr;
+
+  return std::make_unique<RISCVPipelinerLoopInfo>(LHS, RHS, Cond);
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 005cba5d35610e..7e8bcd451a8ef8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -298,6 +298,9 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
 
   unsigned getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
 
+  std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
+  analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
+
 protected:
   const RISCVSubtarget &STI;
 
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 38443e8646de40..c54e452ae83cb1 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -186,6 +186,10 @@ bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
 
 bool RISCVSubtarget::enableSubRegLiveness() const { return true; }
 
+bool RISCVSubtarget::enableMachinePipeliner() const {
+  return getSchedModel().hasInstrSchedModel();
+}
+
   /// Enable use of alias analysis during code generation (during MI
   /// scheduling, DAGCombine, etc.).
 bool RISCVSubtarget::useAA() const { return UseAA; }
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 043838e13b964d..eb5b64b6f5b95e 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -301,6 +301,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
 
   bool enableSubRegLiveness() const override;
 
+  bool enableMachinePipeliner() const override;
+
+  bool useDFAforSMS() const override { return false; }
+
   bool useAA() const override;
 
   unsigned getCacheLineSize() const override {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index c5847d8bfacb42..c93e0e8ea76776 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -119,6 +119,11 @@ static cl::opt<bool> DisableVectorMaskMutation(
     cl::desc("Disable the vector mask scheduling mutation"), cl::init(false),
     cl::Hidden);
 
+static cl::opt<bool>
+    EnableMachinePipeliner("riscv-enable-pipeliner",
+                           cl::desc("Enable Machine Pipeliner for RISC-V"),
+                           cl::init(false), cl::Hidden);
+
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
   RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
@@ -611,6 +616,9 @@ void RISCVPassConfig::addPreRegAlloc() {
   addPass(createRISCVInsertReadWriteCSRPass());
   addPass(createRISCVInsertWriteVXRMPass());
   addPass(createRISCVLandingPadSetupPass());
+
+  if (TM->getOptLevel() != CodeGenOptLevel::None && EnableMachinePipeliner)
+    addPass(&MachinePipelinerID);
 }
 
 void RISCVPassConfig::addFastRegAlloc() {
diff --git a/llvm/test/CodeGen/RISCV/machine-pipeliner.ll b/llvm/test/CodeGen/RISCV/machine-pipeliner.ll
new file mode 100644
index 00000000000000..bad6e8a7abc7da
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/machine-pipeliner.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-p670 -O3 -verify-machineinstrs -riscv-enable-pipeliner=false < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK-NOT-PIPELINED
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-p670 -O3 -verify-machineinstrs -riscv-enable-pipeliner=true < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK-PIPELINED
+
+define void @test_1(ptr noalias %in, ptr noalias %out, i32 signext %cnt) "no-builtins" {
+; CHECK-NOT-PIPELINED-LABEL: test_1:
+; CHECK-NOT-PIPELINED:       # %bb.0: # %entry
+; CHECK-NOT-PIPELINED-NEXT:    blez a2, .LBB0_3
+; CHECK-NOT-PIPELINED-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NOT-PIPELINED-NEXT:    addi a2, a2, -1
+; CHECK-NOT-PIPELINED-NEXT:    sh2add.uw a2, a2, a1
+; CHECK-NOT-PIPELINED-NEXT:    addi a2, a2, 4
+; CHECK-NOT-PIPELINED-NEXT:  .LBB0_2: # %for.body
+; CHECK-NOT-PIPELINED-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NOT-PIPELINED-NEXT:    lw a3, 0(a1)
+; CHECK-NOT-PIPELINED-NEXT:    addi a1, a1, 4
+; CHECK-NOT-PIPELINED-NEXT:    sw a3, 0(a0)
+; CHECK-NOT-PIPELINED-NEXT:    addi a0, a0, 4
+; CHECK-NOT-PIPELINED-NEXT:    bne a1, a2, .LBB0_2
+; CHECK-NOT-PIPELINED-NEXT:  .LBB0_3: # %for.end
+; CHECK-NOT-PIPELINED-NEXT:    ret
+;
+; CHECK-PIPELINED-LABEL: test_1:
+; CHECK-PIPELINED:       # %bb.0: # %entry
+; CHECK-PIPELINED-NEXT:    blez a2, .LBB0_6
+; CHECK-PIPELINED-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-PIPELINED-NEXT:    lw a3, 0(a1)
+; CHECK-PIPELINED-NEXT:    addi a2, a2, -1
+; CHECK-PIPELINED-NEXT:    addi a4, a0, 4
+; CHECK-PIPELINED-NEXT:    sh2add.uw a6, a2, a1
+; CHECK-PIPELINED-NEXT:    addi a1, a1, 4
+; CHECK-PIPELINED-NEXT:    addi a6, a6, 4
+; CHECK-PIPELINED-NEXT:    beq a1, a6, .LBB0_5
+; CHECK-PIPELINED-NEXT:  # %bb.2: # %for.body
+; CHECK-PIPELINED-NEXT:    lw a5, 0(a1)
+; CHECK-PIPELINED-NEXT:    addi a2, a4, 4
+; CHECK-PIPELINED-NEXT:    addi a1, a1, 4
+; CHECK-PIPELINED-NEXT:    beq a1, a6, .LBB0_4
+; CHECK-PIPELINED-NEXT:  .LBB0_3: # %for.body
+; CHECK-PIPELINED-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-PIPELINED-NEXT:    sw a3, 0(a0)
+; CHECK-PIPELINED-NEXT:    mv a3, a5
+; CHECK-PIPELINED-NEXT:    lw a5, 0(a1)
+; CHECK-PIPELINED-NEXT:    mv a0, a4
+; CHECK-PIPELINED-NEXT:    mv a4, a2
+; CHECK-PIPELINED-NEXT:    addi a2, a2, 4
+; CHECK-PIPELINED-NEXT:    addi a1, a1, 4
+; CHECK-PIPELINED-NEXT:    bne a1, a6, .LBB0_3
+; CHECK-PIPELINED-NEXT:  .LBB0_4:
+; CHECK-PIPELINED-NEXT:    sw a3, 0(a0)
+; CHECK-PIPELINED-NEXT:    mv a0, a4
+; CHECK-PIPELINED-NEXT:    mv a3, a5
+; CHECK-PIPELINED-NEXT:  .LBB0_5:
+; CHECK-PIPELINED-NEXT:    sw a3, 0(a0)
+; CHECK-PIPELINED-NEXT:  .LBB0_6: # %for.end
+; CHECK-PIPELINED-NEXT:    ret
+entry:
+  %cmp5 = icmp sgt i32 %cnt, 0
+  br i1 %cmp5, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %in.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %in, %entry ]
+  %out.addr.06 = phi ptr [ %incdec.ptr1, %for.body ], [ %out, %entry ]
+  %0 = load i32, ptr %out.addr.06, align 4
+  store i32 %0, ptr %in.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds i8, ptr %in.addr.07, i64 4
+  %incdec.ptr1 = getelementptr inbounds i8, ptr %out.addr.06, i64 4
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond.not = icmp eq i32 %inc, %cnt
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+

>From ed4f43954fd51f8b08774ad8be4747e86e0cd5ca Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Mon, 2 Dec 2024 12:31:55 +0800
Subject: [PATCH 2/3] Fix comments, rework test and add test for PHI

---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp     |   2 +-
 llvm/test/CodeGen/RISCV/machine-pipeliner.ll | 111 ++++++++++++-------
 2 files changed, 72 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 659374efac955a..ac2a8edfbd125f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -4227,7 +4227,7 @@ class RISCVPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
 
   bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
     // Make the instructions for loop control be placed in stage 0.
-    // The predecessors of PredBranch are considered by the caller.
+    // The predecessors of LHS/RHS are considered by the caller.
     if (LHS && MI == LHS)
       return true;
     if (RHS && MI == RHS)
diff --git a/llvm/test/CodeGen/RISCV/machine-pipeliner.ll b/llvm/test/CodeGen/RISCV/machine-pipeliner.ll
index bad6e8a7abc7da..d2500985766874 100644
--- a/llvm/test/CodeGen/RISCV/machine-pipeliner.ll
+++ b/llvm/test/CodeGen/RISCV/machine-pipeliner.ll
@@ -1,78 +1,109 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=riscv64 -mcpu=sifive-p670 -O3 -verify-machineinstrs -riscv-enable-pipeliner=false < %s \
-; RUN:   | FileCheck %s --check-prefixes=CHECK-NOT-PIPELINED
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-NOT-PIPELINED
 ; RUN: llc -mtriple=riscv64 -mcpu=sifive-p670 -O3 -verify-machineinstrs -riscv-enable-pipeliner=true < %s \
-; RUN:   | FileCheck %s --check-prefixes=CHECK-PIPELINED
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-PIPELINED
 
-define void @test_1(ptr noalias %in, ptr noalias %out, i32 signext %cnt) "no-builtins" {
-; CHECK-NOT-PIPELINED-LABEL: test_1:
+; We shouldn't pipeline this loop as one operand of branch is a PHI.
+define i32 @test_phi() {
+; CHECK-LABEL: test_phi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a0, 0
+; CHECK-NEXT:  .LBB0_1: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    sh a0, 0(zero)
+; CHECK-NEXT:    bnez a1, .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    li a0, 0
+; CHECK-NEXT:    ret
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret i32 0
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv1 = phi i64 [ 0, %entry ], [ 1, %for.body ]
+  store i16 1, ptr null, align 4
+  %exitcond.not.31 = icmp eq i64 %indvars.iv1, 0
+  br i1 %exitcond.not.31, label %for.cond.cleanup, label %for.body
+}
+
+define void @test_pipelined_1(ptr noalias %in, ptr noalias %out, i32 signext %cnt) {
+; CHECK-NOT-PIPELINED-LABEL: test_pipelined_1:
 ; CHECK-NOT-PIPELINED:       # %bb.0: # %entry
-; CHECK-NOT-PIPELINED-NEXT:    blez a2, .LBB0_3
+; CHECK-NOT-PIPELINED-NEXT:    blez a2, .LBB1_3
 ; CHECK-NOT-PIPELINED-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NOT-PIPELINED-NEXT:    addi a2, a2, -1
 ; CHECK-NOT-PIPELINED-NEXT:    sh2add.uw a2, a2, a1
 ; CHECK-NOT-PIPELINED-NEXT:    addi a2, a2, 4
-; CHECK-NOT-PIPELINED-NEXT:  .LBB0_2: # %for.body
+; CHECK-NOT-PIPELINED-NEXT:  .LBB1_2: # %for.body
 ; CHECK-NOT-PIPELINED-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NOT-PIPELINED-NEXT:    lw a3, 0(a1)
 ; CHECK-NOT-PIPELINED-NEXT:    addi a1, a1, 4
+; CHECK-NOT-PIPELINED-NEXT:    addi a3, a3, 1
 ; CHECK-NOT-PIPELINED-NEXT:    sw a3, 0(a0)
 ; CHECK-NOT-PIPELINED-NEXT:    addi a0, a0, 4
-; CHECK-NOT-PIPELINED-NEXT:    bne a1, a2, .LBB0_2
-; CHECK-NOT-PIPELINED-NEXT:  .LBB0_3: # %for.end
+; CHECK-NOT-PIPELINED-NEXT:    bne a1, a2, .LBB1_2
+; CHECK-NOT-PIPELINED-NEXT:  .LBB1_3: # %for.end
 ; CHECK-NOT-PIPELINED-NEXT:    ret
 ;
-; CHECK-PIPELINED-LABEL: test_1:
+; CHECK-PIPELINED-LABEL: test_pipelined_1:
 ; CHECK-PIPELINED:       # %bb.0: # %entry
-; CHECK-PIPELINED-NEXT:    blez a2, .LBB0_6
+; CHECK-PIPELINED-NEXT:    blez a2, .LBB1_6
 ; CHECK-PIPELINED-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-PIPELINED-NEXT:    lw a3, 0(a1)
+; CHECK-PIPELINED-NEXT:    lw a4, 0(a1)
 ; CHECK-PIPELINED-NEXT:    addi a2, a2, -1
-; CHECK-PIPELINED-NEXT:    addi a4, a0, 4
 ; CHECK-PIPELINED-NEXT:    sh2add.uw a6, a2, a1
+; CHECK-PIPELINED-NEXT:    addi a2, a0, 4
 ; CHECK-PIPELINED-NEXT:    addi a1, a1, 4
 ; CHECK-PIPELINED-NEXT:    addi a6, a6, 4
-; CHECK-PIPELINED-NEXT:    beq a1, a6, .LBB0_5
+; CHECK-PIPELINED-NEXT:    beq a1, a6, .LBB1_5
 ; CHECK-PIPELINED-NEXT:  # %bb.2: # %for.body
 ; CHECK-PIPELINED-NEXT:    lw a5, 0(a1)
-; CHECK-PIPELINED-NEXT:    addi a2, a4, 4
+; CHECK-PIPELINED-NEXT:    addi a3, a2, 4
+; CHECK-PIPELINED-NEXT:    addi a4, a4, 1
 ; CHECK-PIPELINED-NEXT:    addi a1, a1, 4
-; CHECK-PIPELINED-NEXT:    beq a1, a6, .LBB0_4
-; CHECK-PIPELINED-NEXT:  .LBB0_3: # %for.body
+; CHECK-PIPELINED-NEXT:    beq a1, a6, .LBB1_4
+; CHECK-PIPELINED-NEXT:  .LBB1_3: # %for.body
 ; CHECK-PIPELINED-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-PIPELINED-NEXT:    sw a3, 0(a0)
-; CHECK-PIPELINED-NEXT:    mv a3, a5
+; CHECK-PIPELINED-NEXT:    sw a4, 0(a0)
+; CHECK-PIPELINED-NEXT:    mv a4, a5
 ; CHECK-PIPELINED-NEXT:    lw a5, 0(a1)
-; CHECK-PIPELINED-NEXT:    mv a0, a4
-; CHECK-PIPELINED-NEXT:    mv a4, a2
-; CHECK-PIPELINED-NEXT:    addi a2, a2, 4
+; CHECK-PIPELINED-NEXT:    mv a0, a2
+; CHECK-PIPELINED-NEXT:    mv a2, a3
+; CHECK-PIPELINED-NEXT:    addi a3, a3, 4
+; CHECK-PIPELINED-NEXT:    addi a4, a4, 1
 ; CHECK-PIPELINED-NEXT:    addi a1, a1, 4
-; CHECK-PIPELINED-NEXT:    bne a1, a6, .LBB0_3
-; CHECK-PIPELINED-NEXT:  .LBB0_4:
-; CHECK-PIPELINED-NEXT:    sw a3, 0(a0)
-; CHECK-PIPELINED-NEXT:    mv a0, a4
-; CHECK-PIPELINED-NEXT:    mv a3, a5
-; CHECK-PIPELINED-NEXT:  .LBB0_5:
-; CHECK-PIPELINED-NEXT:    sw a3, 0(a0)
-; CHECK-PIPELINED-NEXT:  .LBB0_6: # %for.end
+; CHECK-PIPELINED-NEXT:    bne a1, a6, .LBB1_3
+; CHECK-PIPELINED-NEXT:  .LBB1_4:
+; CHECK-PIPELINED-NEXT:    sw a4, 0(a0)
+; CHECK-PIPELINED-NEXT:    mv a0, a2
+; CHECK-PIPELINED-NEXT:    mv a4, a5
+; CHECK-PIPELINED-NEXT:  .LBB1_5:
+; CHECK-PIPELINED-NEXT:    addi a4, a4, 1
+; CHECK-PIPELINED-NEXT:    sw a4, 0(a0)
+; CHECK-PIPELINED-NEXT:  .LBB1_6: # %for.end
 ; CHECK-PIPELINED-NEXT:    ret
 entry:
-  %cmp5 = icmp sgt i32 %cnt, 0
-  br i1 %cmp5, label %for.body, label %for.end
+  %cmp = icmp sgt i32 %cnt, 0
+  br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
-  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %in.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %in, %entry ]
-  %out.addr.06 = phi ptr [ %incdec.ptr1, %for.body ], [ %out, %entry ]
-  %0 = load i32, ptr %out.addr.06, align 4
-  store i32 %0, ptr %in.addr.07, align 4
-  %incdec.ptr = getelementptr inbounds i8, ptr %in.addr.07, i64 4
-  %incdec.ptr1 = getelementptr inbounds i8, ptr %out.addr.06, i64 4
-  %inc = add nuw nsw i32 %i.08, 1
+  %inc.next = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %in.addr.next = phi ptr [ %incdec.in, %for.body ], [ %in, %entry ]
+  %out.addr.next = phi ptr [ %incdec.out, %for.body ], [ %out, %entry ]
+  %0 = load i32, ptr %out.addr.next, align 4
+  %1 = add i32 %0, 1
+  store i32 %1, ptr %in.addr.next, align 4
+  %incdec.in = getelementptr inbounds i8, ptr %in.addr.next, i64 4
+  %incdec.out = getelementptr inbounds i8, ptr %out.addr.next, i64 4
+  %inc = add nuw nsw i32 %inc.next, 1
   %exitcond.not = icmp eq i32 %inc, %cnt
   br i1 %exitcond.not, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body, %entry
   ret void
 }
-

>From fab23b51ce31e0b603245ebcdee95813edf8b548 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Wed, 4 Dec 2024 12:03:38 +0800
Subject: [PATCH 3/3] Use SmallVector<MachineOperand, 3>

---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index ac2a8edfbd125f..7c9276725a81b0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -4218,7 +4218,7 @@ namespace {
 class RISCVPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
   const MachineInstr *LHS;
   const MachineInstr *RHS;
-  SmallVector<MachineOperand, 4> Cond;
+  SmallVector<MachineOperand, 3> Cond;
 
 public:
   RISCVPipelinerLoopInfo(const MachineInstr *LHS, const MachineInstr *RHS,



More information about the llvm-commits mailing list