[llvm-branch-commits] [llvm-branch] r332940 - Merge r332389 to pick up the fix for PR37431, a regression w/ the new EFLAGS lowering.

Mon May 21 20:06:34 PDT 2018

Author: chandlerc
Date: Mon May 21 20:06:34 2018
New Revision: 332940

URL: http://llvm.org/viewvc/llvm-project?rev=332940&view=rev
Log:
Merge r332389 to pick up the fix for PR37431, a regression w/ the new EFLAGS lowering.

Required switching $regname to %regname in the MIR test and regenerating the
CHECKs for the other test.

Modified:
    llvm/branches/release_60/   (props changed)
    llvm/branches/release_60/lib/Target/X86/X86FlagsCopyLowering.cpp
    llvm/branches/release_60/test/CodeGen/X86/copy-eflags.ll
    llvm/branches/release_60/test/CodeGen/X86/flags-copy-lowering.mir

Propchange: llvm/branches/release_60/
------------------------------------------------------------------------------

--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon May 21 20:06:34 2018
@@ -1,3 +1,3 @@
 /llvm/branches/Apple/Pertwee:110850,110961
 /llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,321751,321789,321791,321806,321862,321870,321872,321878,321911,321980,321991,321993-321994,322003,322016,322053,322056,322103,322106,322108,322123,322131,322223,322272,322313,322372,322473,322521,322623,322644,322724,322767,322875,322878-322879,322900,322904-322905,322973,322993,323034,323155,323190,323307,323331,323355,323369,323371,323384,323469,323515,323536,323582,323643,323671-323672,323706,323710,323759,323781,323810-323811,323813,323857,323907-323909,323913,323915,324002,324039,324110,324195,324353,324422,324449,324497,324576,324645,324746,324772,324916,324962,325049,325085,325139,325148,325168,325463,325525,325550,325654,325687,325739,325894,325946,326393,328945,329040,329055-329057,329657,329673,329771,330264,330269
+/llvm/trunk:155241,321751,321789,321791,321806,321862,321870,321872,321878,321911,321980,321991,321993-321994,322003,322016,322053,322056,322103,322106,322108,322123,322131,322223,322272,322313,322372,322473,322521,322623,322644,322724,322767,322875,322878-322879,322900,322904-322905,322973,322993,323034,323155,323190,323307,323331,323355,323369,323371,323384,323469,323515,323536,323582,323643,323671-323672,323706,323710,323759,323781,323810-323811,323813,323857,323907-323909,323913,323915,324002,324039,324110,324195,324353,324422,324449,324497,324576,324645,324746,324772,324916,324962,325049,325085,325139,325148,325168,325463,325525,325550,325654,325687,325739,325894,325946,326393,328945,329040,329055-329057,329657,329673,329771,330264,330269,332389

Modified: llvm/branches/release_60/lib/Target/X86/X86FlagsCopyLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/lib/Target/X86/X86FlagsCopyLowering.cpp?rev=332940&r1=332939&r2=332940&view=diff
==============================================================================
--- llvm/branches/release_60/lib/Target/X86/X86FlagsCopyLowering.cpp (original)
+++ llvm/branches/release_60/lib/Target/X86/X86FlagsCopyLowering.cpp Mon May 21 20:06:34 2018
@@ -127,6 +127,10 @@ private:
                       MachineInstr &JmpI, CondRegArray &CondRegs);
   void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
                    MachineInstr &CopyDefI);
+  void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
+                               MachineBasicBlock::iterator TestPos,
+                               DebugLoc TestLoc, MachineInstr &SetBI,
+                               MachineOperand &FlagUse, CondRegArray &CondRegs);
   void rewriteSetCC(MachineBasicBlock &TestMBB,
                     MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
                     MachineInstr &SetCCI, MachineOperand &FlagUse,
@@ -511,8 +515,7 @@ bool X86FlagsCopyLoweringPass::runOnMach
         } else if (MI.getOpcode() == TargetOpcode::COPY) {
           rewriteCopy(MI, *FlagUse, CopyDefI);
         } else {
-          // We assume that arithmetic instructions that use flags also def
-          // them.
+          // We assume all other instructions that use flags also def them.
           assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
                  "Expected a def of EFLAGS for this instruction!");
 
@@ -524,7 +527,23 @@ bool X86FlagsCopyLoweringPass::runOnMach
           // logic.
           FlagsKilled = true;
 
-          rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
+          switch (MI.getOpcode()) {
+          case X86::SETB_C8r:
+          case X86::SETB_C16r:
+          case X86::SETB_C32r:
+          case X86::SETB_C64r:
+            // Use custom lowering for arithmetic that is merely extending the
+            // carry flag. We model this as the SETB_C* pseudo instructions.
+            rewriteSetCarryExtended(TestMBB, TestPos, TestLoc, MI, *FlagUse,
+                                    CondRegs);
+            break;
+
+          default:
+            // Generically handle remaining uses as arithmetic instructions.
+            rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse,
+                              CondRegs);
+            break;
+          }
           break;
         }
 
@@ -756,6 +775,126 @@ void X86FlagsCopyLoweringPass::rewriteCo
   MI.eraseFromParent();
 }
 
+void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
+    MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
+    DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
+    CondRegArray &CondRegs) {
+  // This routine is only used to handle pseudos for setting a register to zero
+  // or all ones based on CF. This is essentially the sign extended from 1-bit
+  // form of SETB and modeled with the SETB_C* pseudos. They require special
+  // handling as they aren't normal SETcc instructions and are lowered to an
+  // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
+  // they are only provided in reg-defining forms. A complicating factor is that
+  // they can define many different register widths.
+  assert(SetBI.getOperand(0).isReg() &&
+         "Cannot have a non-register defined operand to this variant of SETB!");
+
+  // Little helper to do the common final step of replacing the register def'ed
+  // by this SETB instruction with a new register and removing the SETB
+  // instruction.
+  auto RewriteToReg = [&](unsigned Reg) {
+    MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
+    SetBI.eraseFromParent();
+  };
+
+  // Grab the register class used for this particular instruction.
+  auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());
+
+  MachineBasicBlock &MBB = *SetBI.getParent();
+  auto SetPos = SetBI.getIterator();
+  auto SetLoc = SetBI.getDebugLoc();
+
+  auto AdjustReg = [&](unsigned Reg) {
+    auto &OrigRC = *MRI->getRegClass(Reg);
+    if (&OrigRC == &SetBRC)
+      return Reg;
+
+    unsigned NewReg;
+
+    int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
+    int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
+    assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
+    assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
+    int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
+                       X86::NoSubRegister, X86::sub_32bit};
+
+    // If the original size is smaller than the target *and* is smaller than 4
+    // bytes, we need to explicitly zero extend it. We always extend to 4-bytes
+    // to maximize the chance of being able to CSE that operation and to avoid
+    // partial dependency stalls extending to 2-bytes.
+    if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
+      NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
+      BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
+          .addReg(Reg);
+      if (&SetBRC == &X86::GR32RegClass)
+        return NewReg;
+      Reg = NewReg;
+      OrigRegSize = 4;
+    }
+
+    NewReg = MRI->createVirtualRegister(&SetBRC);
+    if (OrigRegSize < TargetRegSize) {
+      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
+              NewReg)
+          .addImm(0)
+          .addReg(Reg)
+          .addImm(SubRegIdx[OrigRegSize]);
+    } else if (OrigRegSize > TargetRegSize) {
+      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::EXTRACT_SUBREG),
+              NewReg)
+          .addReg(Reg)
+          .addImm(SubRegIdx[TargetRegSize]);
+    } else {
+      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
+          .addReg(Reg);
+    }
+    return NewReg;
+  };
+
+  unsigned &CondReg = CondRegs[X86::COND_B];
+  if (!CondReg)
+    CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);
+
+  // Adjust the condition to have the desired register width by zero-extending
+  // as needed.
+  // FIXME: We should use a better API to avoid the local reference and using a
+  // different variable here.
+  unsigned ExtCondReg = AdjustReg(CondReg);
+
+  // Now we need to turn this into a bitmask. We do this by subtracting it from
+  // zero.
+  unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
+  BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
+  ZeroReg = AdjustReg(ZeroReg);
+
+  unsigned Sub;
+  switch (SetBI.getOpcode()) {
+  case X86::SETB_C8r:
+    Sub = X86::SUB8rr;
+    break;
+
+  case X86::SETB_C16r:
+    Sub = X86::SUB16rr;
+    break;
+
+  case X86::SETB_C32r:
+    Sub = X86::SUB32rr;
+    break;
+
+  case X86::SETB_C64r:
+    Sub = X86::SUB64rr;
+    break;
+
+  default:
+    llvm_unreachable("Invalid SETB_C* opcode!");
+  }
+  unsigned ResultReg = MRI->createVirtualRegister(&SetBRC);
+  BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
+      .addReg(ZeroReg)
+      .addReg(ExtCondReg);
+  return RewriteToReg(ResultReg);
+}
+
 void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
                                             MachineBasicBlock::iterator TestPos,
                                             DebugLoc TestLoc,

Modified: llvm/branches/release_60/test/CodeGen/X86/copy-eflags.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/copy-eflags.ll?rev=332940&r1=332939&r2=332940&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/copy-eflags.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/copy-eflags.ll Mon May 21 20:06:34 2018
@@ -304,3 +304,61 @@ bb1:
   %tmp12 = trunc i32 %tmp11 to i16
   br label %bb1
 }
+
+; Use a particular instruction pattern in order to lower to the post-RA pseudo
+; used to lower SETB into an SBB pattern in order to make sure that kind of
+; usage of a copied EFLAGS continues to work.
+define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) {
+; X32-LABEL: PR37431:
+; X32:       # %bb.0: # %entry
+; X32-NEXT:    pushl %esi
+; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    .cfi_offset %esi, -8
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl (%eax), %eax
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    sarl $31, %ecx
+; X32-NEXT:    cmpl %eax, %eax
+; X32-NEXT:    sbbl %ecx, %eax
+; X32-NEXT:    setb %al
+; X32-NEXT:    sbbb %cl, %cl
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    movb %cl, (%edx)
+; X32-NEXT:    movzbl %al, %eax
+; X32-NEXT:    xorl %ecx, %ecx
+; X32-NEXT:    subl %eax, %ecx
+; X32-NEXT:    xorl %eax, %eax
+; X32-NEXT:    xorl %edx, %edx
+; X32-NEXT:    idivl %ecx
+; X32-NEXT:    movb %dl, (%esi)
+; X32-NEXT:    popl %esi
+; X32-NEXT:    retl
+;
+; X64-LABEL: PR37431:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movslq (%rdi), %rax
+; X64-NEXT:    cmpq %rax, %rax
+; X64-NEXT:    sbbb %dl, %dl
+; X64-NEXT:    cmpq %rax, %rax
+; X64-NEXT:    movb %dl, (%rsi)
+; X64-NEXT:    sbbl %esi, %esi
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    idivl %esi
+; X64-NEXT:    movb %dl, (%rcx)
+; X64-NEXT:    retq
+entry:
+  %tmp = load i32, i32* %arg1
+  %tmp1 = sext i32 %tmp to i64
+  %tmp2 = icmp ugt i64 %tmp1, undef
+  %tmp3 = zext i1 %tmp2 to i8
+  %tmp4 = sub i8 0, %tmp3
+  store i8 %tmp4, i8* %arg2
+  %tmp5 = sext i8 %tmp4 to i32
+  %tmp6 = srem i32 0, %tmp5
+  %tmp7 = trunc i32 %tmp6 to i8
+  store i8 %tmp7, i8* %arg3
+  ret void
+}

Modified: llvm/branches/release_60/test/CodeGen/X86/flags-copy-lowering.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/flags-copy-lowering.mir?rev=332940&r1=332939&r2=332940&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/flags-copy-lowering.mir (original)
+++ llvm/branches/release_60/test/CodeGen/X86/flags-copy-lowering.mir Mon May 21 20:06:34 2018
@@ -66,6 +66,12 @@
     call void @foo()
     ret void
   }
+
+  define void @test_setb_c(i64 %a, i64 %b) {
+  entry:
+    call void @foo()
+    ret void
+  }
 ...
 ---
 name:            test_branch
@@ -481,4 +487,69 @@ body:             |
 
     RET 0
 
+...
+---
+name:            test_setb_c
+# CHECK-LABEL: name: test_setb_c
+liveins:
+  - { reg: '%rdi', virtual-reg: '%0' }
+  - { reg: '%rsi', virtual-reg: '%1' }
+body:             |
+  bb.0:
+    liveins: %rdi, %rsi
+
+    %0:gr64 = COPY %rdi
+    %1:gr64 = COPY %rsi
+    %2:gr64 = ADD64rr %0, %1, implicit-def %eflags
+    %3:gr64 = COPY %eflags
+  ; CHECK-NOT:  COPY{{( killed)?}} %eflags
+  ; CHECK:      %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+  ; CHECK-NOT:  COPY{{( killed)?}} %eflags
+
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+    CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+    ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+    %eflags = COPY %3
+    %4:gr8 = SETB_C8r implicit-def %eflags, implicit %eflags
+    MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %4
+  ; CHECK-NOT:     %eflags =
+  ; CHECK:         %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
+  ; CHECK-NEXT:    %[[ZERO_SUBREG:[^:]*]]:gr8 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_8bit
+  ; CHECK-NEXT:    %[[REPLACEMENT:[^:]*]]:gr8 = SUB8rr %[[ZERO_SUBREG]], %[[CF_REG]]
+  ; CHECK-NEXT:    MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]
+
+    %eflags = COPY %3
+    %5:gr16 = SETB_C16r implicit-def %eflags, implicit %eflags
+    MOV16mr %rsp, 1, %noreg, -16, %noreg, killed %5
+  ; CHECK-NOT:     %eflags =
+  ; CHECK:         %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
+  ; CHECK-NEXT:    %[[CF_TRUNC:[^:]*]]:gr16 = EXTRACT_SUBREG %[[CF_EXT]], %subreg.sub_16bit
+  ; CHECK-NEXT:    %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
+  ; CHECK-NEXT:    %[[ZERO_SUBREG:[^:]*]]:gr16 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_16bit
+  ; CHECK-NEXT:    %[[REPLACEMENT:[^:]*]]:gr16 = SUB16rr %[[ZERO_SUBREG]], %[[CF_TRUNC]]
+  ; CHECK-NEXT:    MOV16mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]
+
+    %eflags = COPY %3
+    %6:gr32 = SETB_C32r implicit-def %eflags, implicit %eflags
+    MOV32mr %rsp, 1, %noreg, -16, %noreg, killed %6
+  ; CHECK-NOT:     %eflags =
+  ; CHECK:         %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
+  ; CHECK-NEXT:    %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
+  ; CHECK-NEXT:    %[[REPLACEMENT:[^:]*]]:gr32 = SUB32rr %[[ZERO]], %[[CF_EXT]]
+  ; CHECK-NEXT:    MOV32mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]
+
+    %eflags = COPY %3
+    %7:gr64 = SETB_C64r implicit-def %eflags, implicit %eflags
+    MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %7
+  ; CHECK-NOT:     %eflags =
+  ; CHECK:         %[[CF_EXT1:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
+  ; CHECK-NEXT:    %[[CF_EXT2:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[CF_EXT1]], %subreg.sub_32bit
+  ; CHECK-NEXT:    %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
+  ; CHECK-NEXT:    %[[ZERO_EXT:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[ZERO]], %subreg.sub_32bit
+  ; CHECK-NEXT:    %[[REPLACEMENT:[^:]*]]:gr64 = SUB64rr %[[ZERO_EXT]], %[[CF_EXT2]]
+  ; CHECK-NEXT:    MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]
+
+    RET 0
+
 ...