[llvm] 241cbf2 - [PowerPC] Fix crash in peephole optimization

Mon Dec 2 06:56:23 PST 2019

Author: Nemanja Ivanovic
Date: 2019-12-02T08:56:04-06:00
New Revision: 241cbf201a6f4b7658697e3c76fc6e741d049a01

URL: https://github.com/llvm/llvm-project/commit/241cbf201a6f4b7658697e3c76fc6e741d049a01
DIFF: https://github.com/llvm/llvm-project/commit/241cbf201a6f4b7658697e3c76fc6e741d049a01.diff

LOG: [PowerPC] Fix crash in peephole optimization

When converting reg+reg shifts to reg+imm rotates, we neglect to consider the
CodeGenOnly versions of the 32-bit shift mnemonics. This means we produce a
rotate with missing operands which causes a crash.

Committing this fix without review since it is non-controversial that the list
of mnemonics to consider should include the 64-bit aliases for the exact
mnemonics.

Fixes PR44183.

Added: 
    llvm/test/CodeGen/PowerPC/pr44183.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index e94ef4b1e505..f5e2b473f1ee 100644

--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -3757,8 +3757,10 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
     ForwardKilledOperandReg = MI.getOperand(ConstantOpNo).getReg();
 
   unsigned Opc = MI.getOpcode();
-  bool SpecialShift32 =
-    Opc == PPC::SLW || Opc == PPC::SLWo || Opc == PPC::SRW || Opc == PPC::SRWo;
+  bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLWo ||
+                        Opc == PPC::SRW || Opc == PPC::SRWo ||
+                        Opc == PPC::SLW8 || Opc == PPC::SLW8o ||
+                        Opc == PPC::SRW8 || Opc == PPC::SRW8o;
   bool SpecialShift64 =
     Opc == PPC::SLD || Opc == PPC::SLDo || Opc == PPC::SRD || Opc == PPC::SRDo;
   bool SetCR = Opc == PPC::SLWo || Opc == PPC::SRWo ||

diff  --git a/llvm/test/CodeGen/PowerPC/pr44183.ll b/llvm/test/CodeGen/PowerPC/pr44183.ll
new file mode 100644
index 000000000000..1a6f932bc6d0
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr44183.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s
+%struct.m.2.5.8.11 = type { %struct.l.0.3.6.9, [7 x i8], %struct.a.1.4.7.10 }
+%struct.l.0.3.6.9 = type { i8 }
+%struct.a.1.4.7.10 = type { [27 x i8], [0 x i32], [4 x i8] }
+define void @_ZN1m1nEv(%struct.m.2.5.8.11* %this) local_unnamed_addr nounwind align 2 {
+; CHECK-LABEL: _ZN1m1nEv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    mr r30, r3
+; CHECK-NEXT:    ld r4, 8(r30)
+; CHECK-NEXT:    lwz r5, 36(r30)
+; CHECK-NEXT:    rldicl r4, r4, 60, 4
+; CHECK-NEXT:    rlwinm r3, r4, 31, 0, 0
+; CHECK-NEXT:    rlwinm r4, r5, 0, 31, 31
+; CHECK-NEXT:    or r4, r4, r3
+; CHECK-NEXT:    bl _ZN1llsE1d
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    ld r3, 16(r30)
+; CHECK-NEXT:    ld r4, 8(r30)
+; CHECK-NEXT:    rldicl r4, r4, 60, 4
+; CHECK-NEXT:    sldi r3, r3, 60
+; CHECK-NEXT:    or r3, r4, r3
+; CHECK-NEXT:    sldi r3, r3, 31
+; CHECK-NEXT:    clrldi r4, r3, 32
+; CHECK-NEXT:    bl _ZN1llsE1d
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    addi r1, r1, 48
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    blr
+entry:
+  %bc = getelementptr inbounds %struct.m.2.5.8.11, %struct.m.2.5.8.11* %this, i64 0, i32 2
+  %0 = bitcast %struct.a.1.4.7.10* %bc to i216*
+  %bf.load = load i216, i216* %0, align 8
+  %bf.lshr = lshr i216 %bf.load, 4
+  %shl.i23 = shl i216 %bf.lshr, 31
+  %shl.i = trunc i216 %shl.i23 to i32
+  %arrayidx = getelementptr inbounds %struct.m.2.5.8.11, %struct.m.2.5.8.11* %this, i64 0, i32 2, i32 1, i64 0
+  %1 = load i32, i32* %arrayidx, align 4
+  %and.i = and i32 %1, 1
+  %or.i = or i32 %and.i, %shl.i
+  tail call void @_ZN1llsE1d(%struct.l.0.3.6.9* undef, i32 %or.i) #1
+  %bf.load10 = load i216, i216* %0, align 8
+  %bf.lshr11 = lshr i216 %bf.load10, 4
+  %shl.i1524 = shl i216 %bf.lshr11, 31
+  %shl.i15 = trunc i216 %shl.i1524 to i32
+  tail call void @_ZN1llsE1d(%struct.l.0.3.6.9* undef, i32 %shl.i15) #1
+  ret void
+}
+declare void @_ZN1llsE1d(%struct.l.0.3.6.9*, i32) local_unnamed_addr #0