[llvm] r190043 - ARM: Add GPR register class excluding LR for use with the ADR instruction.

Tilmann Scheller tilmann.scheller at googlemail.com
Thu Sep 5 04:10:31 PDT 2013

Author: tilmann
Date: Thu Sep  5 06:10:31 2013
New Revision: 190043

URL: http://llvm.org/viewvc/llvm-project?rev=190043&view=rev
ARM: Add GPR register class excluding LR for use with the ADR instruction.

This improves code generation for jump tables by avoiding the emission of "mov pc, lr" which could fool the processor into believing this is a return from a function causing mispredicts. The code generation logic for jump tables uses ADR to materialize the address of the jump target.

Patch by Daniel Stewart!


Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=190043&r1=190042&r2=190043&view=diff
--- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Thu Sep  5 06:10:31 2013
@@ -1233,7 +1233,7 @@ let neverHasSideEffects = 1, isReMateria
 def t2LEApcrel   : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
                                 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>;
 let hasSideEffects = 1 in
-def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
+def t2LEApcrelJT : t2PseudoInst<(outs jtGPR:$Rd),
                                 (ins i32imm:$label, nohash_imm:$id, pred:$p),
                                 4, IIC_iALUi,
                                 []>, Sched<[WriteALU, ReadALU]>;

Modified: llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td?rev=190043&r1=190042&r2=190043&view=diff
--- llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td Thu Sep  5 06:10:31 2013
@@ -240,6 +240,14 @@ def rGPR : RegisterClass<"ARM", [i32], 3
+// jtGPR - Jump Table General Purpose Registers.
+// Used by the Thumb2 instructions to prevent Thumb2 jump tables
+// from using the LR. The implementation of the jump table uses a mov pc, rA
+// type instruction to jump into the table. Use of the LR register (as in
+// mov pc, lr) can cause the ARM branch predictor to think it is returning
+// from a function instead. This causes a mispredict and a pipe flush. 
+def jtGPR : RegisterClass<"ARM", [i32], 32, (sub rGPR, LR)>;
 // Thumb registers are R0-R7 normally. Some instructions can still use
 // the general GPR register class above (MOV, e.g.)
 def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>;

Modified: llvm/trunk/lib/Target/ARM/Thumb2InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb2InstrInfo.cpp?rev=190043&r1=190042&r2=190043&view=diff
--- llvm/trunk/lib/Target/ARM/Thumb2InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/Thumb2InstrInfo.cpp Thu Sep  5 06:10:31 2013
@@ -152,7 +152,13 @@ storeRegToStackSlot(MachineBasicBlock &M
     // gsub_0, but needs an extra constraint for gsub_1 (which could be sp
     // otherwise).
     MachineRegisterInfo *MRI = &MF.getRegInfo();
-    MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+    const TargetRegisterClass* TargetClass = TRI->getMatchingSuperRegClass(RC,
+                                &ARM::rGPRRegClass,
+                                ARM::gsub_1);
+    assert(TargetClass && "No Matching GPRPair with gsub_1 in rGPRRegClass");
+    const TargetRegisterClass* ConstrainedClass =
+                                MRI->constrainRegClass(SrcReg, TargetClass);
+    assert(ConstrainedClass && "Couldn't constrain the register class");
     MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
     AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
@@ -193,7 +199,13 @@ loadRegFromStackSlot(MachineBasicBlock &
     // gsub_0, but needs an extra constraint for gsub_1 (which could be sp
     // otherwise).
     MachineRegisterInfo *MRI = &MF.getRegInfo();
-    MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+    const TargetRegisterClass* TargetClass = TRI->getMatchingSuperRegClass(RC,
+                                &ARM::rGPRRegClass,
+                                ARM::gsub_1);
+    assert(TargetClass && "No Matching GPRPair with gsub_1 in rGPRRegClass");
+    const TargetRegisterClass* ConstrainedClass =
+                                MRI->constrainRegClass(DestReg, TargetClass);
+    assert(ConstrainedClass && "Couldn't constrain the register class");
     MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
     AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);

Added: llvm/trunk/test/CodeGen/ARM/2013-09-04-Thumb2JumpTable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2013-09-04-Thumb2JumpTable.ll?rev=190043&view=auto
--- llvm/trunk/test/CodeGen/ARM/2013-09-04-Thumb2JumpTable.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/2013-09-04-Thumb2JumpTable.ll Thu Sep  5 06:10:31 2013
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 |  FileCheck %s
+define i32 @foo(i32 %n, i32* nocapture %inp) #0 {
+; CHECK: foo
+; CHECK-NOT: mov pc, lr
+  %0 = icmp sgt i32 %n, 1
+  %1 = add nsw i32 %n, -1
+  %loop_guard = icmp sgt i32 %1, 0
+  %or.cond = and i1 %0, %loop_guard
+  br i1 %or.cond, label %stmt.preheader, label %loop_exit
+stmt.preheader:                            ; preds = %.split
+  %adjust_ub = add i32 %n, -2
+  %scevgep6.gep = getelementptr i32* %inp, i32 1
+  %2 = icmp sgt i32 %adjust_ub, 0
+  %adjust_ub.op = add i32 %n, -1
+  %3 = select i1 %2, i32 %adjust_ub.op, i32 1
+  %xtraiter = and i32 %3, 3
+  switch i32 %xtraiter, label %stmt.unr [
+    i32 0, label %stmt.
+    i32 1, label %stmt.unr30
+    i32 2, label %stmt.unr16
+    i32 3, label %stmt.unr8
+  ]
+stmt.unr:                                  ; preds = %stmt.preheader
+  %scevgep6.inc.unr = getelementptr i32* %inp, i32 2
+  br label %stmt.unr8
+stmt.unr8:                                 ; preds = %stmt.preheader, %stmt.unr
+  %imax.03.reg2mem.0.unr = phi i32 [ 1, %stmt.unr ], [ 0, %stmt.preheader ]
+  %scevgep6.phi.unr = phi i32* [ %scevgep6.inc.unr, %stmt.unr ], [ %scevgep6.gep, %stmt.preheader ]
+  %scevgep6.inc.unr15 = getelementptr i32* %scevgep6.phi.unr, i32 1
+  br label %stmt.unr16
+stmt.unr16:                                ; preds = %stmt.preheader, %stmt.unr8
+  %imax.03.reg2mem.0.unr17 = phi i32 [ 0, %stmt.unr8 ], [ 0, %stmt.preheader ]
+  %selv.lcssa.reg2mem.1.unr18 = phi i32 [ 0, %stmt.unr8 ], [ undef, %stmt.preheader ]
+  %scevgep6.phi.unr19 = phi i32* [ %scevgep6.inc.unr15, %stmt.unr8 ], [ %scevgep6.gep, %stmt.preheader ]
+  %indvar.unr20 = phi i32 [ 1, %stmt.unr8 ], [ 0, %stmt.preheader ]
+  %scevgep6.inc.unr27 = getelementptr i32* %scevgep6.phi.unr19, i32 1
+  br label %stmt.unr30
+stmt.unr30:                                ; preds = %stmt.preheader, %stmt.unr16
+  %imax.03.reg2mem.0.unr31 = phi i32 [ 1, %stmt.unr16 ], [ 0, %stmt.preheader ]
+  %selv.lcssa.reg2mem.1.unr32 = phi i32 [ 0, %stmt.unr16 ], [ undef, %stmt.preheader ]
+  %scevgep6.phi.unr33 = phi i32* [ %scevgep6.inc.unr27, %stmt.unr16 ], [ %scevgep6.gep, %stmt.preheader ]
+  %indvar.unr34 = phi i32 [ 0, %stmt.unr16 ], [ 1, %stmt.preheader ]
+  %_p_scalar_.unr36 = load i32* %scevgep6.phi.unr33, align 4
+  %p_.unr37 = icmp sgt i32 %_p_scalar_.unr36, %imax.03.reg2mem.0.unr31
+  %scevgep6.inc.unr41 = getelementptr i32* %scevgep6.phi.unr33, i32 1
+  %4 = icmp ugt i32 %3, 4
+  br i1 %4, label %stmt., label %loop_exit
+loop_exit:                                  ; preds = %stmt.unr30, %stmt., %.split
+  %itemp.0.lcssa.reg2mem.0 = phi i32 [ undef, %.split ], [ 1, %stmt.unr30 ], [0, %stmt. ]
+  ret i32 %itemp.0.lcssa.reg2mem.0
+stmt.:                                      ; preds = %stmt.preheader, %stmt.unr30, %stmt.
+  %imax.03.reg2mem.0 = phi i32 [ %p_selv2.3, %stmt. ], [ 1, %stmt.unr30 ], [ 0, %stmt.preheader ]
+  %selv.lcssa.reg2mem.1 = phi i32 [ 0, %stmt. ], [ 1, %stmt.unr30 ], [ undef, %stmt.preheader ]
+  %scevgep6.phi = phi i32* [ %scevgep6.inc.3, %stmt. ], [ %scevgep6.inc.unr41, %stmt.unr30 ], [ %scevgep6.gep, %stmt.preheader ]
+  %indvar = phi i32 [ %scevgep.sum.3, %stmt. ], [ 1, %stmt.unr30 ], [ 0, %stmt.preheader ]
+  %scevgep.sum = add i32 %indvar, 1
+  %_p_scalar_ = load i32* %scevgep6.phi, align 4
+  %p_ = icmp sgt i32 %_p_scalar_, %imax.03.reg2mem.0
+  %p_selv = select i1 %p_, i32 %scevgep.sum, i32 %selv.lcssa.reg2mem.1
+  %scevgep.sum.3 = add i32 %indvar, 4
+  %p_selv2.3 = select i1 %p_, i32 %_p_scalar_, i32 %p_selv
+  %scevgep6.inc.3 = getelementptr i32* %scevgep6.phi, i32 4
+  %loop_cond.4 = icmp slt i32 %scevgep.sum.3, %adjust_ub
+  br i1 %loop_cond.4, label %stmt., label %loop_exit

More information about the llvm-commits mailing list