[llvm] r363370 - [ARM] MVE VPT Block Pass

Sjoerd Meijer via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 14 04:46:05 PDT 2019


Author: sjoerdmeijer
Date: Fri Jun 14 04:46:05 2019
New Revision: 363370

URL: http://llvm.org/viewvc/llvm-project?rev=363370&view=rev
Log:
[ARM] MVE VPT Block Pass

Initial commit of a new pass to create vector predication blocks, called VPT
blocks, that are supported by the Armv8.1-M MVE architecture.

This is a first naive implementation. I.e., for 2 consecutive predicated
instructions I1 and I2, for example, it will generate 2 VPT blocks:

VPST
I1
VPST
I2

A more optimal implementation would obviously put instructions in the same VPT
block when they are predicated on the same condition and when it is allowed to
do this:

VPTT
I1
I2

We will address this optimisation with follow up patches when the groundwork is
in. Creating VPT Blocks is very similar to IT Blocks, which is the reason I
added this to Thumb2ITBlocks.cpp. This allows reuse of the def use analysis
that we need for the more optimal implementation.

VPT blocks cannot be nested in IT blocks, and vice versa, and so these 2 passes
cannot interact with each other. Instructions allowed in VPT blocks must
be MVE instructions that are marked as VPT compatible.

Differential Revision: https://reviews.llvm.org/D63247

Added:
    llvm/trunk/test/CodeGen/ARM/mve-vpt-block.mir
Modified:
    llvm/trunk/lib/Target/ARM/ARM.h
    llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
    llvm/trunk/lib/Target/ARM/Thumb2ITBlockPass.cpp
    llvm/trunk/lib/Target/ARM/Thumb2InstrInfo.cpp
    llvm/trunk/lib/Target/ARM/Thumb2InstrInfo.h
    llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll

Modified: llvm/trunk/lib/Target/ARM/ARM.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.h?rev=363370&r1=363369&r2=363370&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARM.h (original)
+++ llvm/trunk/lib/Target/ARM/ARM.h Fri Jun 14 04:46:05 2019
@@ -46,6 +46,7 @@ FunctionPass *createARMCodeGenPreparePas
 FunctionPass *createARMConstantIslandPass();
 FunctionPass *createMLxExpansionPass();
 FunctionPass *createThumb2ITBlockPass();
+FunctionPass *createMVEVPTBlockPass();
 FunctionPass *createARMOptimizeBarriersPass();
 FunctionPass *createThumb2SizeReductionPass(
     std::function<bool(const Function &)> Ftor = nullptr);
@@ -68,6 +69,7 @@ void initializeARMCodeGenPreparePass(Pas
 void initializeARMConstantIslandsPass(PassRegistry &);
 void initializeARMExpandPseudoPass(PassRegistry &);
 void initializeThumb2SizeReducePass(PassRegistry &);
+void initializeMVEVPTBlockPass(PassRegistry &);
 
 } // end namespace llvm
 

Modified: llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp?rev=363370&r1=363369&r2=363370&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp Fri Jun 14 04:46:05 2019
@@ -95,6 +95,7 @@ extern "C" void LLVMInitializeARMTarget(
   initializeARMExecutionDomainFixPass(Registry);
   initializeARMExpandPseudoPass(Registry);
   initializeThumb2SizeReducePass(Registry);
+  initializeMVEVPTBlockPass(Registry);
 }
 
 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -508,6 +509,7 @@ void ARMPassConfig::addPreSched2() {
       return !MF.getSubtarget<ARMSubtarget>().isThumb1Only();
     }));
   }
+  addPass(createMVEVPTBlockPass());
   addPass(createThumb2ITBlockPass());
 }
 

Modified: llvm/trunk/lib/Target/ARM/Thumb2ITBlockPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb2ITBlockPass.cpp?rev=363370&r1=363369&r2=363370&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Thumb2ITBlockPass.cpp (original)
+++ llvm/trunk/lib/Target/ARM/Thumb2ITBlockPass.cpp Fri Jun 14 04:46:05 2019
@@ -316,3 +316,123 @@ bool Thumb2ITBlockPass::runOnMachineFunc
 FunctionPass *llvm::createThumb2ITBlockPass() {
   return new Thumb2ITBlockPass();
 }
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "arm-mve-vpt"
+
+namespace {
+  class MVEVPTBlock : public MachineFunctionPass {
+  public:
+    static char ID;
+    const Thumb2InstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+
+    MVEVPTBlock() : MachineFunctionPass(ID) {}
+
+    bool runOnMachineFunction(MachineFunction &Fn) override;
+
+    MachineFunctionProperties getRequiredProperties() const override {
+      return MachineFunctionProperties().set(
+          MachineFunctionProperties::Property::NoVRegs);
+    }
+
+    StringRef getPassName() const override {
+      return "MVE VPT block insertion pass";
+    }
+
+  private:
+    bool InsertVPTBlocks(MachineBasicBlock &MBB);
+  };
+
+  char MVEVPTBlock::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false)
+
+enum VPTMaskValue {
+  T     =  8, // 0b1000
+  TT    =  4, // 0b0100
+  TE    = 12, // 0b1100
+  TTT   =  2, // 0b0010
+  TTE   =  6, // 0b0110
+  TEE   = 10, // 0b1010
+  TET   = 14, // 0b1110
+  TTTT  =  1, // 0b0001
+  TTTE  =  3, // 0b0011
+  TTEE  =  5, // 0b0101
+  TTET  =  7, // 0b0111
+  TEEE  =  9, // 0b1001
+  TEET  = 11, // 0b1011
+  TETT  = 13, // 0b1101
+  TETE  = 15  // 0b1111
+};
+
+bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
+  bool Modified = false;
+  MachineBasicBlock::iterator MBIter = Block.begin();
+  MachineBasicBlock::iterator EndIter = Block.end();
+
+  while (MBIter != EndIter) {
+    MachineInstr *MI = &*MBIter;
+    unsigned PredReg = 0;
+    DebugLoc dl = MI->getDebugLoc();
+
+    ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg);
+
+    // The idea of the predicate is that None, Then and Else are for use when
+    // handling assembly language: they correspond to the three possible
+    // suffixes "", "t" and "e" on the mnemonic. So when instructions are read
+    // from assembly source or disassembled from object code, you expect to see
+    // a mixture whenever there's a long VPT block. But in code generation, we
+    // hope we'll never generate an Else as input to this pass.
+
+    assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds");
+
+    if (Pred == ARMVCC::None) {
+      ++MBIter;
+      continue;
+    }
+
+    MachineInstrBuilder MIBuilder =
+        BuildMI(Block, MBIter, dl, TII->get(ARM::t2VPST));
+    MachineInstr *LastITMI = MI;
+    MachineBasicBlock::iterator InsertPos = MIBuilder.getInstr();
+
+    // The mask value for the VPST instruction is T = 0b1000 = 8
+    MIBuilder.addImm(VPTMaskValue::T);
+
+    finalizeBundle(Block, InsertPos.getInstrIterator(),
+                     ++LastITMI->getIterator());
+    Modified = true;
+    LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump(););
+
+    ++MBIter;
+  }
+  return Modified;
+}
+
+bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
+  const ARMSubtarget &STI =
+      static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+
+  if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
+    return false;
+
+  TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
+  TRI = STI.getRegisterInfo();
+
+  LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n"
+                    << "********** Function: " << Fn.getName() << '\n');
+
+  bool Modified = false;
+  for (MachineBasicBlock &MBB : Fn)
+    Modified |= InsertVPTBlocks(MBB);
+
+  LLVM_DEBUG(dbgs() << "**************************************\n");
+  return Modified;
+}
+
+/// createMVEVPTBlock - Returns an instance of the MVE VPT block
+/// insertion pass.
+FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); }

Modified: llvm/trunk/lib/Target/ARM/Thumb2InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb2InstrInfo.cpp?rev=363370&r1=363369&r2=363370&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Thumb2InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/Thumb2InstrInfo.cpp Fri Jun 14 04:46:05 2019
@@ -685,3 +685,28 @@ ARMCC::CondCodes llvm::getITInstrPredica
     return ARMCC::AL;
   return getInstrPredicate(MI, PredReg);
 }
+
+int llvm::findFirstVPTPredOperandIdx(const MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+
+  if (!MCID.OpInfo)
+    return -1;
+
+  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
+    if (ARM::isVpred(MCID.OpInfo[i].OperandType))
+      return i;
+
+  return -1;
+}
+
+ARMVCC::VPTCodes llvm::getVPTInstrPredicate(const MachineInstr &MI,
+                                            unsigned &PredReg) {
+  int PIdx = findFirstVPTPredOperandIdx(MI);
+  if (PIdx == -1) {
+    PredReg = 0;
+    return ARMVCC::None;
+  }
+
+  PredReg = MI.getOperand(PIdx+1).getReg();
+  return (ARMVCC::VPTCodes)MI.getOperand(PIdx).getImm();
+}

Modified: llvm/trunk/lib/Target/ARM/Thumb2InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb2InstrInfo.h?rev=363370&r1=363369&r2=363370&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Thumb2InstrInfo.h (original)
+++ llvm/trunk/lib/Target/ARM/Thumb2InstrInfo.h Fri Jun 14 04:46:05 2019
@@ -68,6 +68,12 @@ private:
 /// to llvm::getInstrPredicate except it returns AL for conditional branch
 /// instructions which are "predicated", but are not in IT blocks.
 ARMCC::CondCodes getITInstrPredicate(const MachineInstr &MI, unsigned &PredReg);
+
+// getVPTInstrPredicate: VPT analogue of that, plus a helper function
+// corresponding to MachineInstr::findFirstPredOperandIdx.
+int findFirstVPTPredOperandIdx(const MachineInstr &MI);
+ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI,
+                                      unsigned &PredReg);
 }
 
 #endif

Modified: llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll?rev=363370&r1=363369&r2=363370&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll Fri Jun 14 04:46:05 2019
@@ -125,6 +125,7 @@
 ; CHECK-NEXT:      Machine Natural Loop Construction
 ; CHECK-NEXT:      Machine Block Frequency Analysis
 ; CHECK-NEXT:      If Converter
+; CHECK-NEXT:      MVE VPT block insertion pass
 ; CHECK-NEXT:      Thumb IT blocks insertion pass
 ; CHECK-NEXT:      MachineDominator Tree Construction
 ; CHECK-NEXT:      Machine Natural Loop Construction

Added: llvm/trunk/test/CodeGen/ARM/mve-vpt-block.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/mve-vpt-block.mir?rev=363370&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/mve-vpt-block.mir (added)
+++ llvm/trunk/test/CodeGen/ARM/mve-vpt-block.mir Fri Jun 14 04:46:05 2019
@@ -0,0 +1,71 @@
+# RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main-arm-none-eabi"
+
+  define hidden arm_aapcs_vfpcc <4 x float> @test_vminnmq_m_f32_v2(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 {
+  entry:
+    %conv.i = zext i16 %p to i32
+    %0 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i32 %conv.i) #2
+    ret <4 x float> %0
+  }
+
+  declare <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float>, <4 x float>, <4 x float>, i32) #1
+
+  attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #1 = { nounwind readnone }
+  attributes #2 = { nounwind }
+
+
+...
+---
+name:            test_vminnmq_m_f32_v2
+alignment:       2
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:
+  - { reg: '$q0', virtual-reg: '' }
+  - { reg: '$q1', virtual-reg: '' }
+  - { reg: '$q2', virtual-reg: '' }
+  - { reg: '$r0', virtual-reg: '' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+constants:       []
+body:             |
+  bb.0.entry:
+    liveins: $q0, $q1, $q2, $r0
+
+    ; CHECK:       VPST 8, implicit-def $p0
+    ; CHECK-NEXT:  $q0 = nnan ninf nsz VMINNMf32 killed renamable $q1, killed renamable $q2, 1, killed renamable $vpr, killed renamable $q0
+
+    $vpr = VMSR_P0 killed $r0, 14, $noreg
+    renamable $q0 = nnan ninf nsz VMINNMf32 killed renamable $q1, killed renamable $q2, 1, killed renamable $vpr, killed renamable $q0
+    tBX_RET 14, $noreg, implicit $q0
+
+...




More information about the llvm-commits mailing list