[llvm] r364336 - [Peephole] Allow folding loads into instructions w/multiple uses (such as test64rr)

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 25 10:29:18 PDT 2019


Author: reames
Date: Tue Jun 25 10:29:18 2019
New Revision: 364336

URL: http://llvm.org/viewvc/llvm-project?rev=364336&view=rev
Log:
[Peephole] Allow folding loads into instructions w/multiple uses (such as test64rr)

Peephole opt has a one use limitation which appears to be accidental. The function being used was incorrectly documented as returning whether the def had one *user*, but instead returned true only when there was one *use*. Add a corresponding hasOneNonDbgUser helper, and adjust peephole-opt to use the appropriate one.

All of the actual folding code handles multiple uses within a single instruction. That codepath is well exercised through instruction selection.

Differential Revision: https://reviews.llvm.org/D63656


Added:
    llvm/trunk/test/CodeGen/X86/peephole-fold-testrr.mir
Modified:
    llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h
    llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp
    llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp
    llvm/trunk/test/CodeGen/X86/addr-mode-matcher-2.ll
    llvm/trunk/test/CodeGen/X86/or-branch.ll
    llvm/trunk/test/CodeGen/X86/sibcall.ll
    llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll

Modified: llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h?rev=364336&r1=364335&r2=364336&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h (original)
+++ llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h Tue Jun 25 10:29:18 2019
@@ -561,9 +561,14 @@ public:
   }
 
   /// hasOneNonDBGUse - Return true if there is exactly one non-Debug
-  /// instruction using the specified register.
+  /// use of the specified register.
   bool hasOneNonDBGUse(unsigned RegNo) const;
 
+  /// hasOneNonDBGUse - Return true if there is exactly one non-Debug
+  /// instruction using the specified register. Said instruction may have
+  /// multiple uses.
+  bool hasOneNonDBGUser(unsigned RegNo) const;
+  
   /// replaceRegWith - Replace all instances of FromReg with ToReg in the
   /// machine function.  This is like llvm-level X->replaceAllUsesWith(Y),
   /// except that it also changes any definitions of the register as well.

Modified: llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp?rev=364336&r1=364335&r2=364336&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp Tue Jun 25 10:29:18 2019
@@ -423,6 +423,13 @@ bool MachineRegisterInfo::hasOneNonDBGUs
   return ++UI == use_nodbg_end();
 }
 
+bool MachineRegisterInfo::hasOneNonDBGUser(unsigned RegNo) const {
+  use_instr_nodbg_iterator UI = use_instr_nodbg_begin(RegNo);
+  if (UI == use_instr_nodbg_end())
+    return false;
+  return ++UI == use_instr_nodbg_end();
+}
+
 /// clearKillFlags - Iterate over all the uses of the given register and
 /// clear the kill flag from the MachineOperand. This function is used by
 /// optimization passes which extend register lifetimes and need only

Modified: llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp?rev=364336&r1=364335&r2=364336&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp (original)
+++ llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp Tue Jun 25 10:29:18 2019
@@ -1306,7 +1306,7 @@ bool PeepholeOptimizer::optimizeUncoales
 
 /// Check whether MI is a candidate for folding into a later instruction.
 /// We only fold loads to virtual registers and the virtual register defined
-/// has a single use.
+/// has a single user.
 bool PeepholeOptimizer::isLoadFoldable(
     MachineInstr &MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
   if (!MI.canFoldAsLoad() || !MI.mayLoad())
@@ -1316,12 +1316,12 @@ bool PeepholeOptimizer::isLoadFoldable(
     return false;
 
   unsigned Reg = MI.getOperand(0).getReg();
-  // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting
+  // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
   // loads. It should be checked when processing uses of the load, since
   // uses can be removed during peephole.
   if (!MI.getOperand(0).getSubReg() &&
       TargetRegisterInfo::isVirtualRegister(Reg) &&
-      MRI->hasOneNonDBGUse(Reg)) {
+      MRI->hasOneNonDBGUser(Reg)) {
     FoldAsLoadDefCandidates.insert(Reg);
     return true;
   }

Modified: llvm/trunk/test/CodeGen/X86/addr-mode-matcher-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/addr-mode-matcher-2.ll?rev=364336&r1=364335&r2=364336&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/addr-mode-matcher-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/addr-mode-matcher-2.ll Tue Jun 25 10:29:18 2019
@@ -24,8 +24,7 @@
 define void @foo(i1 zeroext, i32) nounwind {
 ; X86-LABEL: foo:
 ; X86:       # %bb.0:
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-NEXT:    testb %al, %al
+; X86-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
 ; X86-NEXT:    je .LBB0_1
 ; X86-NEXT:  # %bb.3:
 ; X86-NEXT:    retl

Modified: llvm/trunk/test/CodeGen/X86/or-branch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/or-branch.ll?rev=364336&r1=364335&r2=364336&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/or-branch.ll (original)
+++ llvm/trunk/test/CodeGen/X86/or-branch.ll Tue Jun 25 10:29:18 2019
@@ -8,8 +8,7 @@ define void @foo(i32 %X, i32 %Y, i32 %Z)
 ; JUMP2-NEXT:    cmpl $5, {{[0-9]+}}(%esp)
 ; JUMP2-NEXT:    jl .LBB0_3
 ; JUMP2-NEXT:  # %bb.1: # %entry
-; JUMP2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; JUMP2-NEXT:    testl %eax, %eax
+; JUMP2-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
 ; JUMP2-NEXT:    je .LBB0_3
 ; JUMP2-NEXT:  # %bb.2: # %UnifiedReturnBlock
 ; JUMP2-NEXT:    retl

Added: llvm/trunk/test/CodeGen/X86/peephole-fold-testrr.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/peephole-fold-testrr.mir?rev=364336&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/peephole-fold-testrr.mir (added)
+++ llvm/trunk/test/CodeGen/X86/peephole-fold-testrr.mir Tue Jun 25 10:29:18 2019
@@ -0,0 +1,88 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=peephole-opt -mtriple=x86_64-- %s -o - | FileCheck %s
+
+--- |
+  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+  target triple = "x86_64-unknown-linux-gnu"
+
+  define i32 @atomic(i8** %arg) {
+    %load = load atomic i8*, i8** %arg unordered, align 8
+    %cmp = icmp eq i8* %load, null
+    %zext = zext i1 %cmp to i32
+    ret i32 %zext
+  }
+
+  define i32 @nonatomic_unoptimized(i8** %arg) {
+    %load = load i8*, i8** %arg, align 8
+    %cmp = icmp eq i8* %load, null
+    %zext = zext i1 %cmp to i32
+    ret i32 %zext
+  }
+
+...
+---
+name:            atomic
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr64 }
+  - { id: 1, class: gr64 }
+  - { id: 2, class: gr8 }
+  - { id: 3, class: gr32 }
+liveins:
+  - { reg: '$rdi', virtual-reg: '%0' }
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: $rdi
+
+    ; CHECK-LABEL: name: atomic
+    ; CHECK: liveins: $rdi
+    ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
+    ; CHECK: CMP64mi8 [[COPY]], 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load unordered 8 from %ir.arg)
+    ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
+    ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
+    ; CHECK: $eax = COPY [[MOVZX32rr8_]]
+    ; CHECK: RET 0, $eax
+    %0:gr64 = COPY $rdi
+    %1:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.arg)
+    TEST64rr %1, %1, implicit-def $eflags
+    %2:gr8 = SETCCr 4, implicit $eflags
+    %3:gr32 = MOVZX32rr8 killed %2
+    $eax = COPY %3
+    RET 0, $eax
+
+...
+---
+name:            nonatomic_unoptimized
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr64 }
+  - { id: 1, class: gr64 }
+  - { id: 2, class: gr8 }
+  - { id: 3, class: gr32 }
+liveins:
+  - { reg: '$rdi', virtual-reg: '%0' }
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: $rdi
+
+    ; CHECK-LABEL: name: nonatomic_unoptimized
+    ; CHECK: liveins: $rdi
+    ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
+    ; CHECK: CMP64mi8 [[COPY]], 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load 8 from %ir.arg)
+    ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
+    ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
+    ; CHECK: $eax = COPY [[MOVZX32rr8_]]
+    ; CHECK: RET 0, $eax
+    %0:gr64 = COPY $rdi
+    %1:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load 8 from %ir.arg)
+    TEST64rr %1, %1, implicit-def $eflags
+    %2:gr8 = SETCCr 4, implicit $eflags
+    %3:gr32 = MOVZX32rr8 killed %2
+    $eax = COPY %3
+    RET 0, $eax
+
+...

Modified: llvm/trunk/test/CodeGen/X86/sibcall.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sibcall.ll?rev=364336&r1=364335&r2=364336&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sibcall.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sibcall.ll Tue Jun 25 10:29:18 2019
@@ -264,8 +264,7 @@ declare i32 @foo4()
 define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp {
 ; X86-LABEL: t11:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
 ; X86-NEXT:    je .LBB11_1
 ; X86-NEXT:  # %bb.2: # %bb
 ; X86-NEXT:    jmp foo5 # TAILCALL
@@ -311,8 +310,7 @@ declare i32 @foo5(i32, i32, i32, i32, i3
 define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp {
 ; X86-LABEL: t12:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
 ; X86-NEXT:    je .LBB12_1
 ; X86-NEXT:  # %bb.2: # %bb
 ; X86-NEXT:    jmp foo6 # TAILCALL

Modified: llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll?rev=364336&r1=364335&r2=364336&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll Tue Jun 25 10:29:18 2019
@@ -1386,8 +1386,7 @@ define i32 @irreducibleCFG() #4 {
 ; ENABLE-NEXT:    jmp LBB16_1
 ; ENABLE-NEXT:  LBB16_2: ## %split
 ; ENABLE-NEXT:    movq _irreducibleCFGb@{{.*}}(%rip), %rax
-; ENABLE-NEXT:    movl (%rax), %eax
-; ENABLE-NEXT:    testl %eax, %eax
+; ENABLE-NEXT:    cmpl $0, (%rax)
 ; ENABLE-NEXT:    je LBB16_3
 ; ENABLE-NEXT:  ## %bb.4: ## %for.body4.i
 ; ENABLE-NEXT:    movq _irreducibleCFGa@{{.*}}(%rip), %rax
@@ -1430,8 +1429,7 @@ define i32 @irreducibleCFG() #4 {
 ; DISABLE-NEXT:    jmp LBB16_1
 ; DISABLE-NEXT:  LBB16_2: ## %split
 ; DISABLE-NEXT:    movq _irreducibleCFGb@{{.*}}(%rip), %rax
-; DISABLE-NEXT:    movl (%rax), %eax
-; DISABLE-NEXT:    testl %eax, %eax
+; DISABLE-NEXT:    cmpl $0, (%rax)
 ; DISABLE-NEXT:    je LBB16_3
 ; DISABLE-NEXT:  ## %bb.4: ## %for.body4.i
 ; DISABLE-NEXT:    movq _irreducibleCFGa@{{.*}}(%rip), %rax




More information about the llvm-commits mailing list