[llvm] 1a8468b - [RISCV] Add a RISCV specific CodeGenPrepare pass.

Thu Jul 14 10:21:09 PDT 2022

Author: Craig Topper
Date: 2022-07-14T10:20:59-07:00
New Revision: 1a8468ba6114719962442292ad4c1037bdb6256a

URL: https://github.com/llvm/llvm-project/commit/1a8468ba6114719962442292ad4c1037bdb6256a
DIFF: https://github.com/llvm/llvm-project/commit/1a8468ba6114719962442292ad4c1037bdb6256a.diff

LOG: [RISCV] Add a RISCV specific CodeGenPrepare pass.

Initial optimization is to convert (i64 (zext (i32 X))) to
(i64 (sext (i32 X))) if the dominating condition for the basic block
guaranteed the sign bit of X is zero.

This frequently occurs in loop preheaders where a signed induction
variable that can never be negative has been widened. There will be
a dominating check that the 32-bit trip count isn't negative or zero.
The check here is not restricted to that specific case though.

A i32->i64 sext is cheaper than zext on RV64 without the Zba
extension. Later optimizations can often remove the sext from the
preheader basic block because the dominating block also needs a sext to
evaluate the greater than 0 check.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D129732

Added: 
    llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
    llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
    llvm/test/CodeGen/RISCV/riscv-codegenprepare.ll

Modified: 
    llvm/lib/Target/RISCV/CMakeLists.txt
    llvm/lib/Target/RISCV/RISCV.h
    llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
    llvm/test/CodeGen/RISCV/O3-pipeline.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 774152cf9e41..3e4c3c62a34d 100644

--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -21,6 +21,7 @@ add_public_tablegen_target(RISCVCommonTableGen)
 add_llvm_target(RISCVCodeGen
   RISCVAsmPrinter.cpp
   RISCVCallLowering.cpp
+  RISCVCodeGenPrepare.cpp
   RISCVMakeCompressible.cpp
   RISCVExpandAtomicPseudoInsts.cpp
   RISCVExpandPseudoInsts.cpp

diff  --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index 917837a307ad..e6140edc8403 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -30,6 +30,9 @@ class MachineInstr;
 class MachineOperand;
 class PassRegistry;
 
+FunctionPass *createRISCVCodeGenPreparePass();
+void initializeRISCVCodeGenPreparePass(PassRegistry &);
+
 bool lowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                     AsmPrinter &AP);
 bool lowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,

diff  --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
new file mode 100644
index 000000000000..4a3a7786e5e7
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -0,0 +1,116 @@
+//===----- RISCVCodeGenPrepare.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a RISCV specific version of CodeGenPrepare.
+// It munges the code in the input function to better prepare it for
+// SelectionDAG-based code generation. This works around limitations in it's
+// basic-block-at-a-time approach.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-codegenprepare"
+#define PASS_NAME "RISCV CodeGenPrepare"
+
+STATISTIC(NumZExtToSExt, "Number of SExt instructions converted to ZExt");
+
+namespace {
+
+class RISCVCodeGenPrepare : public FunctionPass {
+  const DataLayout *DL;
+  const RISCVSubtarget *ST;
+
+public:
+  static char ID;
+
+  RISCVCodeGenPrepare() : FunctionPass(ID) {}
+
+  bool runOnFunction(Function &F) override;
+
+  StringRef getPassName() const override { return PASS_NAME; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<TargetPassConfig>();
+  }
+
+private:
+  bool optimizeZExt(ZExtInst *I);
+};
+
+} // end anonymous namespace
+
+bool RISCVCodeGenPrepare::optimizeZExt(ZExtInst *ZExt) {
+  if (!ST->is64Bit())
+    return false;
+
+  Value *Src = ZExt->getOperand(0);
+
+  // We only care about ZExt from i32 to i64.
+  if (!ZExt->getType()->isIntegerTy(64) || !Src->getType()->isIntegerTy(32))
+    return false;
+
+  // Look for an opportunity to replace (i64 (zext (i32 X))) with a sext if we
+  // can determine that the sign bit of X is zero via a dominating condition.
+  // This often occurs with widened induction variables.
+  if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src,
+                              Constant::getNullValue(Src->getType()), ZExt,
+                              *DL)) {
+    IRBuilder<> Builder(ZExt);
+    Value *SExt = Builder.CreateSExt(Src, ZExt->getType());
+    SExt->takeName(ZExt);
+
+    ZExt->replaceAllUsesWith(SExt);
+    ZExt->eraseFromParent();
+    ++NumZExtToSExt;
+    return true;
+  }
+
+  return false;
+}
+
+bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
+  if (skipFunction(F))
+    return false;
+
+  auto &TPC = getAnalysis<TargetPassConfig>();
+  auto &TM = TPC.getTM<RISCVTargetMachine>();
+  ST = &TM.getSubtarget<RISCVSubtarget>(F);
+
+  DL = &F.getParent()->getDataLayout();
+
+  bool MadeChange = false;
+  for (auto &BB : F) {
+    for (Instruction &I : llvm::make_early_inc_range(BB)) {
+      if (auto *ZExt = dyn_cast<ZExtInst>(&I))
+        MadeChange |= optimizeZExt(ZExt);
+    }
+  }
+
+  return MadeChange;
+}
+
+INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
+
+char RISCVCodeGenPrepare::ID = 0;
+
+FunctionPass *llvm::createRISCVCodeGenPreparePass() {
+  return new RISCVCodeGenPrepare();
+}

diff  --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index b2707b753e87..f30f67657f9b 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -49,6 +49,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   initializeGlobalISel(*PR);
   initializeRISCVMakeCompressibleOptPass(*PR);
   initializeRISCVGatherScatterLoweringPass(*PR);
+  initializeRISCVCodeGenPreparePass(*PR);
   initializeRISCVMergeBaseOffsetOptPass(*PR);
   initializeRISCVSExtWRemovalPass(*PR);
   initializeRISCVExpandPseudoPass(*PR);
@@ -165,6 +166,7 @@ class RISCVPassConfig : public TargetPassConfig {
   }
 
   void addIRPasses() override;
+  void addCodeGenPrepare() override;
   bool addPreISel() override;
   bool addInstSelector() override;
   bool addIRTranslator() override;
@@ -192,6 +194,12 @@ void RISCVPassConfig::addIRPasses() {
   TargetPassConfig::addIRPasses();
 }
 
+void RISCVPassConfig::addCodeGenPrepare() {
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createRISCVCodeGenPreparePass());
+  TargetPassConfig::addCodeGenPrepare();
+}
+
 bool RISCVPassConfig::addPreISel() {
   if (TM->getOptLevel() != CodeGenOpt::None) {
     // Add a barrier before instruction selection so that we will not get

diff  --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index d0bbe886f679..5782312fbb9f 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -57,6 +57,7 @@
 ; CHECK-NEXT:       Expand reduction intrinsics
 ; CHECK-NEXT:       Natural Loop Information
 ; CHECK-NEXT:       TLS Variable Hoist
+; CHECK-NEXT:       RISCV CodeGenPrepare
 ; CHECK-NEXT:       CodeGen Prepare
 ; CHECK-NEXT:       Dominator Tree Construction
 ; CHECK-NEXT:       Exception handling preparation

diff  --git a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
new file mode 100644
index 000000000000..9bef8e024c67
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=riscv64 | FileCheck %s
+
+
+; Make sure we don't emit a pair of shift for the zext in the preheader. We
+; can tell that bit 31 is 0 in the preheader and rely on %n already being
+; sign extended without adding zeros explicitly.
+define void @test1(ptr nocapture noundef %a, i32 noundef signext %n) {
+; CHECK-LABEL: test1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    blez a1, .LBB0_2
+; CHECK-NEXT:  .LBB0_1: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    lw a2, 0(a0)
+; CHECK-NEXT:    addiw a2, a2, 4
+; CHECK-NEXT:    sw a2, 0(a0)
+; CHECK-NEXT:    addi a1, a1, -1
+; CHECK-NEXT:    addi a0, a0, 4
+; CHECK-NEXT:    bnez a1, .LBB0_1
+; CHECK-NEXT:  .LBB0_2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %cmp3 = icmp sgt i32 %n, 0
+  br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %add = add nsw i32 %0, 4
+  store i32 %add, ptr %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}

diff  --git a/llvm/test/CodeGen/RISCV/riscv-codegenprepare.ll b/llvm/test/CodeGen/RISCV/riscv-codegenprepare.ll
new file mode 100644
index 000000000000..2b33ccac8649
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/riscv-codegenprepare.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -S -riscv-codegenprepare -mtriple=riscv64 | FileCheck %s
+
+; Test that we can convert the %wide.trip.count zext to a sext. The dominating
+; condition %cmp3 ruled out %n being negative.
+define void @test1(ptr nocapture noundef %a, i32 noundef signext %n) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP3]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = sext i32 [[N]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[LSR_IV5:%.*]] = phi i64 [ [[WIDE_TRIP_COUNT]], [[FOR_BODY_PREHEADER]] ], [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi ptr [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ], [ [[UGLYGEP:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[LSR_IV]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[LSR_IV]], align 4
+; CHECK-NEXT:    [[UGLYGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 4
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV5]], -1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+;
+entry:
+  %cmp3 = icmp sgt i32 %n, 0
+  br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %lsr.iv5 = phi i64 [ %wide.trip.count, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
+  %lsr.iv = phi ptr [ %a, %for.body.preheader ], [ %uglygep, %for.body ]
+  %0 = load i32, ptr %lsr.iv, align 4
+  %add = add nsw i32 %0, 4
+  store i32 %add, ptr %lsr.iv, align 4
+  %uglygep = getelementptr i8, ptr %lsr.iv, i64 4
+  %lsr.iv.next = add nsw i64 %lsr.iv5, -1
+  %exitcond.not = icmp eq i64 %lsr.iv.next, 0
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}