[llvm] a0841df - [BPF] Fix a bug in peephole optimization

Wed Nov 20 15:20:21 PST 2019

Author: Yonghong Song
Date: 2019-11-20T15:19:59-08:00
New Revision: a0841dfe8594f189d79f3612fec019eda4824474

URL: https://github.com/llvm/llvm-project/commit/a0841dfe8594f189d79f3612fec019eda4824474
DIFF: https://github.com/llvm/llvm-project/commit/a0841dfe8594f189d79f3612fec019eda4824474.diff

LOG: [BPF] Fix a bug in peephole optimization

One of current peephole optimiations is to remove SLL/SRL if
the sub register has been zero extended. This phase has two bugs
and one limitations.

First, for the physical subregister used in pseudo insn COPY
like below, it permits incorrect optimization.
    %0:gpr32 = COPY $w0
    ...
    %4:gpr = MOV_32_64 %0:gpr32
    %5:gpr = SLL_ri %4:gpr(tied-def 0), 32
    %6:gpr = SRA_ri %5:gpr(tied-def 0), 32
The $w0 could be from the return value of a previous function call
and its upper 32-bit value might contain some non-zero values.
The same applies to function arguments.

Second, the current code may permits removing SLL/SRA like below:
    %0:gpr32 = COPY $w0
    %1:gpr32 = COPY %0:gpr32
    ...
    %4:gpr = MOV_32_64 %1:gpr32
    %5:gpr = SLL_ri %4:gpr(tied-def 0), 32
    %6:gpr = SRA_ri %5:gpr(tied-def 0), 32
The reason is that it did not follow def-use chain to skip all
intermediate 32bit-to-32bit COPY instructions.

The current implementation is also very conservative for PHI
instructions. If any PHI insn component is another PHI or COPY insn,
it will just permit SLL/SRA.

This patch fixed the issue as follows:
 - During def/use chain traversal, if any physical register is read,
   SLL/SRA will be preserved as these physical registers are mostly
   from function return values or current function arguments.
 - Recursively visit all COPY and PHI instructions.

Added: 
    llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll
    llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll

Modified: 
    llvm/lib/Target/BPF/BPFMIPeephole.cpp
    llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll
    llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
index e9eecc55c3c3..c0f99fa2b449 100644

--- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -51,6 +51,9 @@ struct BPFMIPeephole : public MachineFunctionPass {
   // Initialize class variables.
   void initialize(MachineFunction &MFParm);
 
+  bool isCopyFrom32Def(MachineInstr *CopyMI);
+  bool isInsnFrom32Def(MachineInstr *DefInsn);
+  bool isPhiFrom32Def(MachineInstr *MovMI);
   bool isMovFrom32Def(MachineInstr *MovMI);
   bool eliminateZExtSeq(void);
 
@@ -75,42 +78,77 @@ void BPFMIPeephole::initialize(MachineFunction &MFParm) {
   LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
 }
 
-bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
+bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)
 {
-  MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
+  MachineOperand &opnd = CopyMI->getOperand(1);
 
-  LLVM_DEBUG(dbgs() << "  Def of Mov Src:");
-  LLVM_DEBUG(DefInsn->dump());
+  if (!opnd.isReg())
+    return false;
 
-  if (!DefInsn)
+  // Return false if getting value from a 32bit physical register.
+  // Most likely, this physical register is aliased to
+  // function call return value or current function parameters.
+  Register Reg = opnd.getReg();
+  if (!Register::isVirtualRegister(Reg))
     return false;
 
-  if (DefInsn->isPHI()) {
-    for (unsigned i = 1, e = DefInsn->getNumOperands(); i < e; i += 2) {
-      MachineOperand &opnd = DefInsn->getOperand(i);
+  if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)
+    return false;
 
-      if (!opnd.isReg())
-        return false;
+  MachineInstr *DefInsn = MRI->getVRegDef(Reg);
+  if (!isInsnFrom32Def(DefInsn))
+    return false;
 
-      MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
-      // quick check on PHI incoming definitions.
-      if (!PhiDef || PhiDef->isPHI() || PhiDef->getOpcode() == BPF::COPY)
-        return false;
-    }
-  }
+  return true;
+}
 
-  if (DefInsn->getOpcode() == BPF::COPY) {
-    MachineOperand &opnd = DefInsn->getOperand(1);
+bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
+{
+  for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {
+    MachineOperand &opnd = PhiMI->getOperand(i);
 
     if (!opnd.isReg())
       return false;
 
-    Register Reg = opnd.getReg();
-    if ((Register::isVirtualRegister(Reg) &&
-         MRI->getRegClass(Reg) == &BPF::GPRRegClass))
+    MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
+    if (!PhiDef)
+      return false;
+    if (PhiDef->isPHI() && !isPhiFrom32Def(PhiDef))
+      return false;
+    if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef))
+      return false;
+  }
+
+  return true;
+}
+
+// The \p DefInsn instruction defines a virtual register.
+bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
+{
+  if (!DefInsn)
+    return false;
+
+  if (DefInsn->isPHI()) {
+    if (!isPhiFrom32Def(DefInsn))
+      return false;
+  } else if (DefInsn->getOpcode() == BPF::COPY) {
+    if (!isCopyFrom32Def(DefInsn))
       return false;
   }
 
+  return true;
+}
+
+bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
+{
+  MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
+
+  LLVM_DEBUG(dbgs() << "  Def of Mov Src:");
+  LLVM_DEBUG(DefInsn->dump());
+
+  if (!isInsnFrom32Def(DefInsn))
+    return false;
+
   LLVM_DEBUG(dbgs() << "  One ZExt elim sequence identified.\n");
 
   return true;

diff  --git a/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll
index a48141cf0404..160be56c30a3 100644
--- a/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll
+++ b/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll
@@ -55,6 +55,9 @@ entry:
   %c.d = select i1 %cmp, i32 %c, i32 %d
   ret i32 %c.d
 }
+; CHECK-LABEL: select_cc_32
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} >>= 32
 
 ; Function Attrs: norecurse nounwind readnone
 define dso_local i64 @select_cc_32_64(i32 %a, i32 %b, i64 %c, i64 %d) local_unnamed_addr #0 {
@@ -63,6 +66,9 @@ entry:
   %c.d = select i1 %cmp, i64 %c, i64 %d
   ret i64 %c.d
 }
+; CHECK-LABEL: select_cc_32_64
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} >>= 32
 
 ; Function Attrs: norecurse nounwind readnone
 define dso_local i32 @select_cc_64_32(i64 %a, i64 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
@@ -71,6 +77,8 @@ entry:
   %c.d = select i1 %cmp, i32 %c, i32 %d
   ret i32 %c.d
 }
+; CHECK-LABEL: select_cc_64_32
+; CHECK-NOT: r{{[0-9]+}} <<= 32
 
 ; Function Attrs: norecurse nounwind readnone
 define dso_local i32 @selecti_cc_32(i32 %a, i32 %c, i32 %d) local_unnamed_addr #0 {
@@ -79,6 +87,9 @@ entry:
   %c.d = select i1 %cmp, i32 %c, i32 %d
   ret i32 %c.d
 }
+; CHECK-LABEL: selecti_cc_32
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} >>= 32
 
 ; Function Attrs: norecurse nounwind readnone
 define dso_local i64 @selecti_cc_32_64(i32 %a, i64 %c, i64 %d) local_unnamed_addr #0 {
@@ -87,6 +98,9 @@ entry:
   %c.d = select i1 %cmp, i64 %c, i64 %d
   ret i64 %c.d
 }
+; CHECK-LABEL: selecti_cc_32_64
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} >>= 32
 
 ; Function Attrs: norecurse nounwind readnone
 define dso_local i32 @selecti_cc_64_32(i64 %a, i32 %c, i32 %d) local_unnamed_addr #0 {
@@ -95,6 +109,5 @@ entry:
   %c.d = select i1 %cmp, i32 %c, i32 %d
   ret i32 %c.d
 }
-; There shouldn't be any type promotion, all of them are expected to be
-; eliminated by peephole optimization.
+; CHECK-LABEL: selecti_cc_64_32
 ; CHECK-NOT: r{{[0-9]+}} <<= 32

diff  --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll
new file mode 100644
index 000000000000..5a72f59593c6
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll
@@ -0,0 +1,34 @@
+; RUN: llc -O2 -march=bpfel -mcpu=v2 -mattr=+alu32 < %s | FileCheck %s
+;
+; For the below test case, 'b' in 'ret == b' needs SLL/SLR.
+; 'ret' in 'ret == b' does not need SLL/SLR as all 'ret' values
+; are assigned through 'w<reg> = <value>' alu32 operations.
+;
+; extern int helper(int);
+; int test(int a, int b, int c, int d) {
+;   int ret;
+;   if (a < b)
+;     ret = (c < d) ? -1 : 0;
+;   else
+;     ret = (c < a) ? 1 : 2;
+;   return helper(ret == b);
+; }
+
+define dso_local i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr {
+entry:
+  %cmp = icmp slt i32 %a, %b
+  %cmp1 = icmp slt i32 %c, %d
+  %cond = sext i1 %cmp1 to i32
+  %cmp2 = icmp slt i32 %c, %a
+  %cond3 = select i1 %cmp2, i32 1, i32 2
+  %ret.0 = select i1 %cmp, i32 %cond, i32 %cond3
+  %cmp4 = icmp eq i32 %ret.0, %b
+  %conv = zext i1 %cmp4 to i32
+  %call = tail call i32 @helper(i32 %conv)
+  ret i32 %call
+}
+; CHECK: r{{[0-9]+}} >>= 32
+; CHECK-NOT: r{{[0-9]+}} >>= 32
+; CHECK: if r{{[0-9]+}} == r{{[0-9]+}} goto
+
+declare dso_local i32 @helper(i32) local_unnamed_addr

diff  --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll
new file mode 100644
index 000000000000..46a1b231c1f0
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll
@@ -0,0 +1,34 @@
+; RUN: llc -O2 -march=bpfel -mcpu=v2 -mattr=+alu32 < %s | FileCheck %s
+;
+; For the below test case, both 'ret' and 'b' at 'ret == b'
+; need SLL/SLR. For 'ret', 'ret = a' may receive the value
+; from argument with high 32-bit invalid data.
+;
+; extern int helper(int);
+; int test(int a, int b, int c, int d) {
+;   int ret;
+;   if (a < b)
+;     ret = (c < d) ? a : 0;
+;   else
+;     ret = (c < a) ? 1 : 2;
+;   return helper(ret == b);
+; }
+
+define dso_local i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr {
+entry:
+  %cmp = icmp slt i32 %a, %b
+  %cmp1 = icmp slt i32 %c, %d
+  %cond = select i1 %cmp1, i32 %a, i32 0
+  %cmp2 = icmp slt i32 %c, %a
+  %cond3 = select i1 %cmp2, i32 1, i32 2
+  %ret.0 = select i1 %cmp, i32 %cond, i32 %cond3
+  %cmp4 = icmp eq i32 %ret.0, %b
+  %conv = zext i1 %cmp4 to i32
+  %call = tail call i32 @helper(i32 %conv)
+  ret i32 %call
+}
+; CHECK: r{{[0-9]+}} >>= 32
+; CHECK: r{{[0-9]+}} >>= 32
+; CHECK: if r{{[0-9]+}} == r{{[0-9]+}} goto
+
+declare dso_local i32 @helper(i32) local_unnamed_addr

diff  --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll
index cf68ec56ca3d..63a7c25ed33b 100644
--- a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll
+++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll
@@ -47,8 +47,8 @@ define dso_local i64 @select_u(i32 %a, i32 %b, i64 %c, i64 %d) local_unnamed_add
 entry:
   %cmp = icmp ugt i32 %a, %b
   %c.d = select i1 %cmp, i64 %c, i64 %d
-; CHECK-NOT: r{{[0-9]+}} <<= 32
-; CHECK-NOT: r{{[0-9]+}} >>= 32
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} >>= 32
 ; CHECK: if r{{[0-9]+}} {{<|>}} r{{[0-9]+}} goto
   ret i64 %c.d
 }
@@ -58,8 +58,8 @@ define dso_local i64 @select_u_2(i32 %a, i64 %b, i64 %c, i64 %d) local_unnamed_a
 ; CHECK-LABEL: select_u_2:
 entry:
   %conv = zext i32 %a to i64
-; CHECK-NOT: r{{[0-9]+}} <<= 32
-; CHECK-NOT: r{{[0-9]+}} >>= 32
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} >>= 32
   %cmp = icmp ugt i64 %conv, %b
   %c.d = select i1 %cmp, i64 %c, i64 %d
   ret i64 %c.d
@@ -100,8 +100,23 @@ define dso_local i32* @inc_p(i32* readnone %p, i32 %a) local_unnamed_addr #0 {
 ; CHECK-LABEL: inc_p:
 entry:
   %idx.ext = zext i32 %a to i64
-; CHECK-NOT: r{{[0-9]+}} <<= 32
-; CHECK-NOT: r{{[0-9]+}} >>= 32
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} >>= 32
   %add.ptr = getelementptr inbounds i32, i32* %p, i64 %idx.ext
   ret i32* %add.ptr
 }
+
+define dso_local i32 @test() local_unnamed_addr {
+; CHECK-LABEL: test:
+entry:
+  %call = tail call i32 bitcast (i32 (...)* @helper to i32 ()*)()
+  %cmp = icmp sgt i32 %call, 6
+; The shifts can't be optimized out because %call comes from function call
+; return i32 so the high bits might be invalid.
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} s>>= 32
+  %cond = zext i1 %cmp to i32
+; CHECK: if r{{[0-9]+}} s{{<|>}} {{[0-9]+}} goto
+  ret i32 %cond
+}
+declare dso_local i32 @helper(...) local_unnamed_addr