[llvm] 6aa9e74 - [AVR] Expand large shifts early in IR
Ayke van Laethem via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 24 05:03:42 PDT 2021
Author: Ayke van Laethem
Date: 2021-07-24T14:03:26+02:00
New Revision: 6aa9e746ebde6efd2479fa833d2d816d0a7bb353
URL: https://github.com/llvm/llvm-project/commit/6aa9e746ebde6efd2479fa833d2d816d0a7bb353
DIFF: https://github.com/llvm/llvm-project/commit/6aa9e746ebde6efd2479fa833d2d816d0a7bb353.diff
LOG: [AVR] Expand large shifts early in IR
This patch makes sure shift instructions such as this one:
%result = shl i32 %n, %amount
are expanded just before the IR to SelectionDAG conversion to a loop so
that calls to non-existing library functions such as __ashlsi3 are
avoided. The generated code is currently pretty bad but there's a lot of
room for improvement: the shift itself can be done in just four
instructions.
Differential Revision: https://reviews.llvm.org/D96677
Added:
llvm/lib/Target/AVR/AVRShiftExpand.cpp
llvm/test/CodeGen/AVR/shift-expand.ll
Modified:
llvm/lib/Target/AVR/AVR.h
llvm/lib/Target/AVR/AVRTargetMachine.cpp
llvm/lib/Target/AVR/CMakeLists.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AVR/AVR.h b/llvm/lib/Target/AVR/AVR.h
index f0746d73c95f..7332307c07a3 100644
--- a/llvm/lib/Target/AVR/AVR.h
+++ b/llvm/lib/Target/AVR/AVR.h
@@ -22,6 +22,7 @@ namespace llvm {
class AVRTargetMachine;
class FunctionPass;
+Pass *createAVRShiftExpandPass();
FunctionPass *createAVRISelDag(AVRTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createAVRExpandPseudoPass();
@@ -30,6 +31,7 @@ FunctionPass *createAVRRelaxMemPass();
FunctionPass *createAVRDynAllocaSRPass();
FunctionPass *createAVRBranchSelectionPass();
+void initializeAVRShiftExpandPass(PassRegistry &);
void initializeAVRExpandPseudoPass(PassRegistry&);
void initializeAVRRelaxMemPass(PassRegistry&);
diff --git a/llvm/lib/Target/AVR/AVRShiftExpand.cpp b/llvm/lib/Target/AVR/AVRShiftExpand.cpp
new file mode 100644
index 000000000000..b7dcd860467d
--- /dev/null
+++ b/llvm/lib/Target/AVR/AVRShiftExpand.cpp
@@ -0,0 +1,147 @@
+//===- AVRShift.cpp - Shift Expansion Pass --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Expand 32-bit shift instructions (shl, lshr, ashr) to inline loops, just
+/// like avr-gcc. This must be done in IR because otherwise the type legalizer
+/// will turn 32-bit shifts into (non-existing) library calls such as __ashlsi3.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVR.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+
+using namespace llvm;
+
+namespace {
+
+class AVRShiftExpand : public FunctionPass {
+public:
+ static char ID;
+
+ AVRShiftExpand() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override;
+
+ StringRef getPassName() const override { return "AVR Shift Expansion"; }
+
+private:
+ void expand(BinaryOperator *BI);
+};
+
+} // end of anonymous namespace
+
+char AVRShiftExpand::ID = 0;
+
+INITIALIZE_PASS(AVRShiftExpand, "avr-shift-expand", "AVR Shift Expansion",
+ false, false)
+
+Pass *llvm::createAVRShiftExpandPass() { return new AVRShiftExpand(); }
+
+bool AVRShiftExpand::runOnFunction(Function &F) {
+ SmallVector<BinaryOperator *, 1> ShiftInsts;
+ auto &Ctx = F.getContext();
+ for (Instruction &I : instructions(F)) {
+ if (!I.isShift())
+ // Only expand shift instructions (shl, lshr, ashr).
+ continue;
+ if (I.getType() != Type::getInt32Ty(Ctx))
+ // Only expand plain i32 types.
+ continue;
+ if (isa<ConstantInt>(I.getOperand(1)))
+ // Only expand when the shift amount is not known.
+ // Known shift amounts are (currently) better expanded inline.
+ continue;
+ ShiftInsts.push_back(cast<BinaryOperator>(&I));
+ }
+
+ // The expanding itself needs to be done separately as expand() will remove
+ // these instructions. Removing instructions while iterating over a basic
+ // block is not a great idea.
+ for (auto *I : ShiftInsts) {
+ expand(I);
+ }
+
+ // Return whether this function expanded any shift instructions.
+ return ShiftInsts.size() > 0;
+}
+
+void AVRShiftExpand::expand(BinaryOperator *BI) {
+ auto &Ctx = BI->getContext();
+ IRBuilder<> Builder(BI);
+ Type *Int32Ty = Type::getInt32Ty(Ctx);
+ Type *Int8Ty = Type::getInt8Ty(Ctx);
+ Value *Int8Zero = ConstantInt::get(Int8Ty, 0);
+
+ // Split the current basic block at the point of the existing shift
+ // instruction and insert a new basic block for the loop.
+ BasicBlock *BB = BI->getParent();
+ Function *F = BB->getParent();
+ BasicBlock *EndBB = BB->splitBasicBlock(BI, "shift.done");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "shift.loop", F, EndBB);
+
+ // Truncate the shift amount to i8, which is trivially lowered to a single
+ // AVR register.
+ Builder.SetInsertPoint(&BB->back());
+ Value *ShiftAmount = Builder.CreateTrunc(BI->getOperand(1), Int8Ty);
+
+ // Replace the unconditional branch that splitBasicBlock created with a
+ // conditional branch.
+ Value *Cmp1 = Builder.CreateICmpEQ(ShiftAmount, Int8Zero);
+ Builder.CreateCondBr(Cmp1, EndBB, LoopBB);
+ BB->back().eraseFromParent();
+
+ // Create the loop body starting with PHI nodes.
+ Builder.SetInsertPoint(LoopBB);
+ PHINode *ShiftAmountPHI = Builder.CreatePHI(Int8Ty, 2);
+ ShiftAmountPHI->addIncoming(ShiftAmount, BB);
+ PHINode *ValuePHI = Builder.CreatePHI(Int32Ty, 2);
+ ValuePHI->addIncoming(BI->getOperand(0), BB);
+
+ // Subtract the shift amount by one, as we're shifting one this loop
+ // iteration.
+ Value *ShiftAmountSub =
+ Builder.CreateSub(ShiftAmountPHI, ConstantInt::get(Int8Ty, 1));
+ ShiftAmountPHI->addIncoming(ShiftAmountSub, LoopBB);
+
+ // Emit the actual shift instruction. The
diff erence is that this shift
+ // instruction has a constant shift amount, which can be emitted inline
+ // without a library call.
+ Value *ValueShifted;
+ switch (BI->getOpcode()) {
+ case Instruction::Shl:
+ ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(Int32Ty, 1));
+ break;
+ case Instruction::LShr:
+ ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
+ break;
+ case Instruction::AShr:
+ ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
+ break;
+ default:
+ llvm_unreachable("asked to expand an instruction that is not a shift");
+ }
+ ValuePHI->addIncoming(ValueShifted, LoopBB);
+
+ // Branch to either the loop again (if there is more to shift) or to the
+ // basic block after the loop (if all bits are shifted).
+ Value *Cmp2 = Builder.CreateICmpEQ(ShiftAmountSub, Int8Zero);
+ Builder.CreateCondBr(Cmp2, EndBB, LoopBB);
+
+ // Collect the resulting value. This is necessary in the IR but won't produce
+ // any actual instructions.
+ Builder.SetInsertPoint(BI);
+ PHINode *Result = Builder.CreatePHI(Int32Ty, 2);
+ Result->addIncoming(BI->getOperand(0), BB);
+ Result->addIncoming(ValueShifted, LoopBB);
+
+ // Replace the original shift instruction.
+ BI->replaceAllUsesWith(Result);
+ BI->eraseFromParent();
+}
diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/llvm/lib/Target/AVR/AVRTargetMachine.cpp
index 0fa8623e2fb7..5be4260ce035 100644
--- a/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -65,6 +65,7 @@ class AVRPassConfig : public TargetPassConfig {
return getTM<AVRTargetMachine>();
}
+ void addIRPasses() override;
bool addInstSelector() override;
void addPreSched2() override;
void addPreEmitPass() override;
@@ -76,6 +77,15 @@ TargetPassConfig *AVRTargetMachine::createPassConfig(PassManagerBase &PM) {
return new AVRPassConfig(*this, PM);
}
+void AVRPassConfig::addIRPasses() {
+ // Expand instructions like
+ // %result = shl i32 %n, %amount
+ // to a loop so that library calls are avoided.
+ addPass(createAVRShiftExpandPass());
+
+ TargetPassConfig::addIRPasses();
+}
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTarget() {
// Register the target.
RegisterTargetMachine<AVRTargetMachine> X(getTheAVRTarget());
@@ -83,6 +93,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTarget() {
auto &PR = *PassRegistry::getPassRegistry();
initializeAVRExpandPseudoPass(PR);
initializeAVRRelaxMemPass(PR);
+ initializeAVRShiftExpandPass(PR);
}
const AVRSubtarget *AVRTargetMachine::getSubtargetImpl() const {
diff --git a/llvm/lib/Target/AVR/CMakeLists.txt b/llvm/lib/Target/AVR/CMakeLists.txt
index f24bbd32562c..cde2138048dd 100644
--- a/llvm/lib/Target/AVR/CMakeLists.txt
+++ b/llvm/lib/Target/AVR/CMakeLists.txt
@@ -24,6 +24,7 @@ add_llvm_target(AVRCodeGen
AVRMCInstLower.cpp
AVRRelaxMemOperations.cpp
AVRRegisterInfo.cpp
+ AVRShiftExpand.cpp
AVRSubtarget.cpp
AVRTargetMachine.cpp
AVRTargetObjectFile.cpp
diff --git a/llvm/test/CodeGen/AVR/shift-expand.ll b/llvm/test/CodeGen/AVR/shift-expand.ll
new file mode 100644
index 000000000000..7baba06586a7
--- /dev/null
+++ b/llvm/test/CodeGen/AVR/shift-expand.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -avr-shift-expand -S %s -o - | FileCheck %s
+
+; The avr-shift-expand pass expands large shifts with a non-constant shift
+; amount to a loop. These loops avoid generating a (non-existing) builtin such
+; as __ashlsi3.
+
+target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"
+target triple = "avr"
+
+define i32 @shl(i32 %value, i32 %amount) addrspace(1) {
+; CHECK-LABEL: @shl(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
+; CHECK: shift.loop:
+; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1
+; CHECK-NEXT: [[TMP6]] = shl i32 [[TMP4]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
+; CHECK: shift.done:
+; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT: ret i32 [[TMP8]]
+;
+ %result = shl i32 %value, %amount
+ ret i32 %result
+}
+
+define i32 @lshr(i32 %value, i32 %amount) addrspace(1) {
+; CHECK-LABEL: @lshr(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
+; CHECK: shift.loop:
+; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1
+; CHECK-NEXT: [[TMP6]] = lshr i32 [[TMP4]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
+; CHECK: shift.done:
+; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT: ret i32 [[TMP8]]
+;
+ %result = lshr i32 %value, %amount
+ ret i32 %result
+}
+
+define i32 @ashr(i32 %0, i32 %1) addrspace(1) {
+; CHECK-LABEL: @ashr(
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1:%.*]] to i8
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i8 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
+; CHECK: shift.loop:
+; CHECK-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP3]], [[TMP2:%.*]] ], [ [[TMP7:%.*]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[TMP0:%.*]], [[TMP2]] ], [ [[TMP8:%.*]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT: [[TMP7]] = sub i8 [[TMP5]], 1
+; CHECK-NEXT: [[TMP8]] = ashr i32 [[TMP6]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[TMP9]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
+; CHECK: shift.done:
+; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP0]], [[TMP2]] ], [ [[TMP8]], [[SHIFT_LOOP]] ]
+; CHECK-NEXT: ret i32 [[TMP10]]
+;
+ %3 = ashr i32 %0, %1
+ ret i32 %3
+}
+
+; This function is not modified because it is not an i32.
+define i40 @shl40(i40 %value, i40 %amount) addrspace(1) {
+; CHECK-LABEL: @shl40(
+; CHECK-NEXT: [[RESULT:%.*]] = shl i40 [[VALUE:%.*]], [[AMOUNT:%.*]]
+; CHECK-NEXT: ret i40 [[RESULT]]
+;
+ %result = shl i40 %value, %amount
+ ret i40 %result
+}
+
+; This function isn't either, although perhaps it should.
+define i24 @shl24(i24 %value, i24 %amount) addrspace(1) {
+; CHECK-LABEL: @shl24(
+; CHECK-NEXT: [[RESULT:%.*]] = shl i24 [[VALUE:%.*]], [[AMOUNT:%.*]]
+; CHECK-NEXT: ret i24 [[RESULT]]
+;
+ %result = shl i24 %value, %amount
+ ret i24 %result
+}
More information about the llvm-commits
mailing list