[llvm] r326774 - [AVR] Remove the earlyclobber flag from LDDWRdYQ

Tue Mar 6 03:20:25 PST 2018

Author: dylanmckay
Date: Tue Mar  6 03:20:25 2018
New Revision: 326774

URL: http://llvm.org/viewvc/llvm-project?rev=326774&view=rev
Log:
[AVR] Remove the earlyclobber flag from LDDWRdYQ

Before I started maintaining the AVR backend, this instruction
never originally used to have an earlyclobber flag.

Some time afterwards (years ago), I must've added it back in, not realising that it
was left out for a reason.

This pseudo instrction exists solely to work around a long standing bug
in the register allocator.

Before this commit, the LDDWRdYQ pseudo was not actually working around
any bug. With the earlyclobber flag removed again, the LDDWRdYQ pseudo
now correctly works around PR13375 again.

Added:
    llvm/trunk/test/CodeGen/AVR/issue-regalloc-stackframe-folding-earlyclobber.ll
Modified:
    llvm/trunk/lib/Target/AVR/AVRInstrInfo.td

Modified: llvm/trunk/lib/Target/AVR/AVRInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AVR/AVRInstrInfo.td?rev=326774&r1=326773&r2=326774&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AVR/AVRInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AVR/AVRInstrInfo.td Tue Mar  6 03:20:25 2018
@@ -1228,9 +1228,23 @@ isReMaterializable = 1 in
                           [(set i16:$dst, (load addr:$memri))]>,
                    Requires<[HasSRAM]>;
 
+  // An identical pseudo instruction to LDDWRdPtrQ, expect restricted to the Y
+  // register and without the @earlyclobber flag.
+  //
+  // Used to work around a bug caused by the register allocator not
+  // being able to handle the expansion of a COPY into an machine instruction
+  // that has an earlyclobber flag. This is because the register allocator will
+  // try expand a copy from a register slot into an earlyclobber instruction.
+  // Instructions that are earlyclobber need to be in a dedicated earlyclobber slot.
+  //
+  // This pseudo instruction can be used pre-AVR pseudo expansion in order to
+  // get a frame index load without directly using earlyclobber instructions.
+  //
+  // The pseudo expansion pass trivially expands this into LDDWRdPtrQ.
+  //
+  // This instruction may be removed once PR13375 is fixed.
   let mayLoad = 1,
-  hasSideEffects = 0,
-  Constraints = "@earlyclobber $dst" in
+  hasSideEffects = 0 in
   def LDDWRdYQ : Pseudo<(outs DREGS:$dst),
                         (ins memri:$memri),
                         "lddw\t$dst, $memri",

Added: llvm/trunk/test/CodeGen/AVR/issue-regalloc-stackframe-folding-earlyclobber.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AVR/issue-regalloc-stackframe-folding-earlyclobber.ll?rev=326774&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AVR/issue-regalloc-stackframe-folding-earlyclobber.ll (added)
+++ llvm/trunk/test/CodeGen/AVR/issue-regalloc-stackframe-folding-earlyclobber.ll Tue Mar  6 03:20:25 2018
@@ -0,0 +1,139 @@
+; RUN: llc < %s -march=avr -mcpu=atmega328 | FileCheck %s
+
+; This test case is designed to trigger a bug caused by the register
+; allocator not handling the case where a target generates a load/store with
+; a frame index that happens to be an earlyclobber instruction.
+;
+; It is taken from the translated LLVM IR of the Rust core library.
+;
+; The error message looked like
+; Assertion failed: (MI && "No instruction defining live value"), function computeDeadValues
+;
+; See PR13375 for details.
+
+%values = type { i16, [2 x i8], [40 x i32], [0 x i8] }
+
+ at POW10TO128 = external constant [14 x i32], align 4
+
+; CHECK: core_num_flt2dec_strategy_dragon
+define nonnull dereferenceable(164) %values* @core_num_flt2dec_strategy_dragon(%values* returned dereferenceable(164) %arg, i16 %arg1) unnamed_addr {
+start:
+  %ret.i = alloca [40 x i32], align 4
+  %tmp = icmp eq i16 undef, 0
+  br i1 %tmp, label %bb5, label %bb1
+
+bb1:
+  unreachable
+
+bb5:
+  br i1 undef, label %bb14, label %bb11
+
+bb11:
+  unreachable
+
+bb14:
+  %tmp2 = bitcast [40 x i32]* %ret.i to i8*
+  call void @llvm.memset.p0i8.i16(i8* align 4 %tmp2, i8 0, i16 160, i1 false)
+  %tmp3 = getelementptr inbounds %values, %values* %arg, i16 0, i32 0
+  %tmp4 = load i16, i16* %tmp3, align 2
+  %tmp5 = icmp ult i16 %tmp4, 14
+  %tmp6 = getelementptr inbounds %values, %values* %arg, i16 0, i32 2, i16 0
+  br i1 %tmp5, label %bb2.i38, label %bb3.i39
+
+bb2.i38:
+  %tmp7 = getelementptr inbounds %values, %values* %arg, i16 0, i32 2, i16 %tmp4
+  %tmp8 = ptrtoint i32* %tmp6 to i16
+  br label %bb4.outer.i122
+
+bb4.outer.i122:
+  %iter.sroa.0.0.ph.i119 = phi i16 [ %tmp12, %bb24.i141 ], [ %tmp8, %bb2.i38 ]
+  %retsz.0.ph.i121 = phi i16 [ %.retsz.0.i140, %bb24.i141 ], [ 0, %bb2.i38 ]
+  br label %bb4.i125
+
+bb4.i125:
+  %iter.sroa.0.0.i123 = phi i16 [ %tmp12, %core.iter.Enumerate.ALPHA ], [ %iter.sroa.0.0.ph.i119, %bb4.outer.i122 ]
+  %tmp9 = inttoptr i16 %iter.sroa.0.0.i123 to i32*
+  %tmp10 = icmp eq i32* %tmp9, %tmp7
+  br i1 %tmp10, label %core.num.bignum.Big32x40.exit44, label %core.iter.Enumerate.ALPHA
+
+core.iter.Enumerate.ALPHA:
+  %tmp11 = getelementptr inbounds i32, i32* %tmp9, i16 1
+  %tmp12 = ptrtoint i32* %tmp11 to i16
+  %tmp13 = load i32, i32* %tmp9, align 4
+  %tmp14 = icmp eq i32 %tmp13, 0
+  br i1 %tmp14, label %bb4.i125, label %core..iter..Enumerate.exit17
+
+core..iter..Enumerate.exit17:
+  %tmp15 = zext i32 %tmp13 to i64
+  br label %core..iter..Enumerate.exit17.i132
+
+core..iter..Enumerate.exit17.i132:
+  %carry.085.i129 = phi i32 [ 0, %core..iter..Enumerate.exit17 ], [ %tmp28, %core_slice_IndexMut.exit13 ]
+  %tmp16 = icmp ult i16 undef, 40
+  br i1 %tmp16, label %core_slice_IndexMut.exit13, label %panic.i.i14.i134
+
+bb16.i133:
+  %tmp17 = icmp eq i32 %tmp28, 0
+  br i1 %tmp17, label %bb24.i141, label %bb21.i136
+
+panic.i.i14.i134:
+  unreachable
+
+core_slice_IndexMut.exit13:
+  %tmp18 = load i32, i32* null, align 4
+  %tmp19 = getelementptr inbounds [40 x i32], [40 x i32]* %ret.i, i16 0, i16 undef
+  %tmp20 = load i32, i32* %tmp19, align 4
+  %tmp21 = zext i32 %tmp18 to i64
+  %tmp22 = mul nuw i64 %tmp21, %tmp15
+  %tmp23 = zext i32 %tmp20 to i64
+  %tmp24 = zext i32 %carry.085.i129 to i64
+  %tmp25 = add nuw nsw i64 %tmp23, %tmp24
+  %tmp26 = add i64 %tmp25, %tmp22
+  %tmp27 = lshr i64 %tmp26, 32
+  %tmp28 = trunc i64 %tmp27 to i32
+  %tmp29 = icmp eq i32* undef, getelementptr inbounds ([14 x i32], [14 x i32]* @POW10TO128, i16 1, i16 0)
+  br i1 %tmp29, label %bb16.i133, label %core..iter..Enumerate.exit17.i132
+
+bb21.i136:
+  %tmp30 = icmp ult i16 undef, 40
+  br i1 %tmp30, label %"_ZN4core5slice70_$LT$impl$u20$core..ops..IndexMut$LT$I$GT$$u20$for$u20$$u5b$T$u5d$$GT$9index_mut17h8eccc0af1ec6f971E.exit.i138", label %panic.i.i.i137
+
+panic.i.i.i137:
+  unreachable
+
+"_ZN4core5slice70_$LT$impl$u20$core..ops..IndexMut$LT$I$GT$$u20$for$u20$$u5b$T$u5d$$GT$9index_mut17h8eccc0af1ec6f971E.exit.i138":
+  store i32 %tmp28, i32* undef, align 4
+  br label %bb24.i141
+
+bb24.i141:
+  %sz.0.i139 = phi i16 [ 15, %"_ZN4core5slice70_$LT$impl$u20$core..ops..IndexMut$LT$I$GT$$u20$for$u20$$u5b$T$u5d$$GT$9index_mut17h8eccc0af1ec6f971E.exit.i138" ], [ 14, %bb16.i133 ]
+  %tmp31 = add i16 %sz.0.i139, 0
+  %tmp32 = icmp ult i16 %retsz.0.ph.i121, %tmp31
+  %.retsz.0.i140 = select i1 %tmp32, i16 %tmp31, i16 %retsz.0.ph.i121
+  br label %bb4.outer.i122
+
+bb3.i39:
+  %tmp33 = call fastcc i16 @_ZN4core3num6bignum8Big32x4010mul_digits9mul_inner17h5d3461bce04d16ccE([40 x i32]* nonnull dereferenceable(160) %ret.i, i32* noalias nonnull readonly getelementptr inbounds ([14 x i32], [14 x i32]* @POW10TO128, i16 0, i16 0), i16 14, i32* noalias nonnull readonly %tmp6, i16 %tmp4)
+  br label %core.num.bignum.Big32x40.exit44
+
+core.num.bignum.Big32x40.exit44:
+  %retsz.0.i40 = phi i16 [ %tmp33, %bb3.i39 ], [ %retsz.0.ph.i121, %bb4.i125 ]
+  call void @llvm.memcpy.p0i8.p0i8.i16(i8* align 4 undef, i8* align 4 %tmp2, i16 160, i1 false)
+  store i16 %retsz.0.i40, i16* %tmp3, align 2
+  %tmp34 = and i16 %arg1, 256
+  %tmp35 = icmp eq i16 %tmp34, 0
+  br i1 %tmp35, label %bb30, label %bb27
+
+bb27:
+  unreachable
+
+bb30:
+  ret %values* %arg
+}
+
+declare fastcc i16 @_ZN4core3num6bignum8Big32x4010mul_digits9mul_inner17h5d3461bce04d16ccE([40 x i32]* nocapture dereferenceable(160), i32* noalias nonnull readonly, i16, i32* noalias nonnull readonly, i16)
+
+declare void @llvm.memset.p0i8.i16(i8* nocapture writeonly, i8, i16, i1)
+
+declare void @llvm.memcpy.p0i8.p0i8.i16(i8* nocapture writeonly, i8* nocapture readonly, i16, i1)
+