[llvm] 2d6a5ab - [X86]Recommit D154193 - Remove TEST in AND32ri+TEST16rr in peephole-opt

via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 14 00:44:58 PDT 2023


Author: XinWang10
Date: 2023-07-14T03:42:42-04:00
New Revision: 2d6a5ab5ebd4907fe3bb2ee67574fd672ad80bd8

URL: https://github.com/llvm/llvm-project/commit/2d6a5ab5ebd4907fe3bb2ee67574fd672ad80bd8
DIFF: https://github.com/llvm/llvm-project/commit/2d6a5ab5ebd4907fe3bb2ee67574fd672ad80bd8.diff

LOG: [X86]Recommit D154193 - Remove TEST in AND32ri+TEST16rr in peephole-opt

Previously we remove a pattern like:
  %reg = and32ri %in_reg, 5
  ...                         // EFLAGS not changed.
  %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index
  test64rr %src_reg, %src_reg, implicit-def $eflags
We can remove test64rr since it has same functionality as and subreg_to_reg avoid the opt in previous code, so we handle this case specially.
And this case is also can be opted for the same reason, like:
  %reg = and32ri %in_reg, 5
  ...                         // EFLAGS not changed.
  %src_reg = copy %reg.sub_16bit:gr32
  test16rr %src_reg, %src_reg, implicit-def $eflags
The COPY from gr32 to gr16 prevent the opt in previous code too, just handle it specially as what we did for test64rr.

Reviewed By: skan

Differential Revision: https://reviews.llvm.org/D154193

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86InstrInfo.cpp
    llvm/test/CodeGen/X86/peephole-test-after-add.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 9f9dc539533ffe..0b8bec8fb35fc0 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -971,18 +971,19 @@ static bool findRedundantFlagInstr(MachineInstr &CmpInstr,
                                    MachineInstr **AndInstr,
                                    const TargetRegisterInfo *TRI,
                                    bool &NoSignFlag, bool &ClearsOverflowFlag) {
-  if (CmpValDefInstr.getOpcode() != X86::SUBREG_TO_REG)
+  if (!(CmpValDefInstr.getOpcode() == X86::SUBREG_TO_REG &&
+        CmpInstr.getOpcode() == X86::TEST64rr) &&
+      !(CmpValDefInstr.getOpcode() == X86::COPY &&
+        CmpInstr.getOpcode() == X86::TEST16rr))
     return false;
 
-  if (CmpInstr.getOpcode() != X86::TEST64rr)
-    return false;
-
-  // CmpInstr is a TEST64rr instruction, and `X86InstrInfo::analyzeCompare`
-  // guarantees that it's analyzable only if two registers are identical.
-  assert(
-      (CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) &&
-      "CmpInstr is an analyzable TEST64rr, and `X86InstrInfo::analyzeCompare` "
-      "requires two reg operands are the same.");
+  // CmpInstr is a TEST16rr/TEST64rr instruction, and
+  // `X86InstrInfo::analyzeCompare` guarantees that it's analyzable only if two
+  // registers are identical.
+  assert((CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) &&
+         "CmpInstr is an analyzable TEST16rr/TEST64rr, and "
+         "`X86InstrInfo::analyzeCompare` requires two reg operands are the"
+         "same.");
 
   // Caller (`X86InstrInfo::optimizeCompareInstr`) guarantees that
   // `CmpValDefInstr` defines the value that's used by `CmpInstr`; in this case
@@ -990,20 +991,37 @@ static bool findRedundantFlagInstr(MachineInstr &CmpInstr,
   // redundant.
   assert(
       (MRI->getVRegDef(CmpInstr.getOperand(0).getReg()) == &CmpValDefInstr) &&
-      "Caller guarantees that TEST64rr is a user of SUBREG_TO_REG.");
+      "Caller guarantees that TEST64rr is a user of SUBREG_TO_REG or TEST16rr "
+      "is a user of COPY sub16bit.");
+  MachineInstr *VregDefInstr = nullptr;
+  if (CmpInstr.getOpcode() == X86::TEST16rr) {
+    if (!CmpValDefInstr.getOperand(1).getReg().isVirtual())
+      return false;
+    VregDefInstr = MRI->getVRegDef(CmpValDefInstr.getOperand(1).getReg());
+    if (!VregDefInstr)
+      return false;
+    // We can only remove test when AND32ri or AND64ri32 whose imm can fit 16bit
+    // size, others 32/64 bit ops would test higher bits which test16rr don't
+    // want to.
+    if (!((VregDefInstr->getOpcode() == X86::AND32ri ||
+           VregDefInstr->getOpcode() == X86::AND64ri32) &&
+          isUInt<16>(VregDefInstr->getOperand(2).getImm())))
+      return false;
+  }
 
-  // As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is typically
-  // 0.
-  if (CmpValDefInstr.getOperand(1).getImm() != 0)
-    return false;
+  if (CmpInstr.getOpcode() == X86::TEST64rr) {
+    // As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is
+    // typically 0.
+    if (CmpValDefInstr.getOperand(1).getImm() != 0)
+      return false;
 
-  // As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically
-  // sub_32bit or sub_xmm.
-  if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit)
-    return false;
+    // As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically
+    // sub_32bit or sub_xmm.
+    if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit)
+      return false;
 
-  MachineInstr *VregDefInstr =
-      MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg());
+    VregDefInstr = MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg());
+  }
 
   assert(VregDefInstr && "Must have a definition (SSA)");
 
@@ -1021,6 +1039,11 @@ static bool findRedundantFlagInstr(MachineInstr &CmpInstr,
     //   ...                                // EFLAGS not changed
     //   %extended_reg = subreg_to_reg 0, %reg, %subreg.sub_32bit
     //   test64rr %extended_reg, %extended_reg, implicit-def $eflags
+    // or
+    //   %reg = and32* ...
+    //   ...                         // EFLAGS not changed.
+    //   %src_reg = copy %reg.sub_16bit:gr32
+    //   test16rr %src_reg, %src_reg, implicit-def $eflags
     //
     // If subsequent readers use a subset of bits that don't change
     // after `and*` instructions, it's likely that the test64rr could
@@ -4421,10 +4444,15 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
           break;
         }
 
-        // Look back for the following pattern, in which case the test64rr
-        // instruction could be erased.
+        // Look back for the following pattern, in which case the
+        // test16rr/test64rr instruction could be erased.
         //
-        // Example:
+        // Example for test16rr:
+        //  %reg = and32ri %in_reg, 5
+        //  ...                         // EFLAGS not changed.
+        //  %src_reg = copy %reg.sub_16bit:gr32
+        //  test16rr %src_reg, %src_reg, implicit-def $eflags
+        // Example for test64rr:
         //  %reg = and32ri %in_reg, 5
         //  ...                         // EFLAGS not changed.
         //  %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index

diff  --git a/llvm/test/CodeGen/X86/peephole-test-after-add.mir b/llvm/test/CodeGen/X86/peephole-test-after-add.mir
index 3c6986a0d1b67b..5023c966d6a847 100644
--- a/llvm/test/CodeGen/X86/peephole-test-after-add.mir
+++ b/llvm/test/CodeGen/X86/peephole-test-after-add.mir
@@ -58,13 +58,29 @@
   entry:
     %3 = icmp ne i16 %0, 0
     %4 = and i32 %1, 123456
-    %truc = trunc i32 %4 to i16
-    %5 = icmp eq i16 %truc, 0
+    %trunc = trunc i32 %4 to i16
+    %5 = icmp eq i16 %trunc, 0
     %6 = select i1 %3, i1 %5, i1 false
     br i1 %6, label %if.then, label %if.end
 
   if.then:                                          ; preds = %entry
-    store i16 %0, ptr %2, align 4
+    store i32 %4, ptr %2, align 4
+    br label %if.end
+
+  if.end:                                           ; preds = %if.then, %entry
+    ret i16 0
+  }
+
+  define i16 @erase_test16_sf(i16 %0, i16 %1, ptr nocapture %2) {
+  entry:
+    %3 = icmp ne i16 %0, 0
+    %4 = and i16 %1, 1234
+    %5 = icmp slt i16 %4, 0
+    %6 = select i1 %3, i1 %5, i1 false
+    br i1 %6, label %if.then, label %if.end
+
+  if.then:                                          ; preds = %entry
+    store i16 %4, ptr %2, align 4
     br label %if.end
 
   if.end:                                           ; preds = %if.then, %entry
@@ -303,9 +319,8 @@ body:             |
   ; CHECK-NEXT: bb.1.entry:
   ; CHECK-NEXT:   successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 123, implicit-def dead $eflags
+  ; CHECK-NEXT:   [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 123, implicit-def $eflags
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:gr16 = COPY [[AND32ri]].sub_16bit
-  ; CHECK-NEXT:   TEST16rr [[COPY4]], [[COPY4]], implicit-def $eflags
   ; CHECK-NEXT:   JCC_1 %bb.3, 5, implicit $eflags
   ; CHECK-NEXT:   JMP_1 %bb.2
   ; CHECK-NEXT: {{  $}}
@@ -374,16 +389,16 @@ tracksDebugUserValues: false
 registers:
   - { id: 0, class: gr32, preferred-register: '' }
   - { id: 1, class: gr32, preferred-register: '' }
-  - { id: 2, class: gr64, preferred-register: '' }
-  - { id: 3, class: gr16, preferred-register: '' }
+  - { id: 2, class: gr32, preferred-register: '' }
+  - { id: 3, class: gr64, preferred-register: '' }
   - { id: 4, class: gr16, preferred-register: '' }
-  - { id: 5, class: gr32, preferred-register: '' }
+  - { id: 5, class: gr16, preferred-register: '' }
   - { id: 6, class: gr32, preferred-register: '' }
   - { id: 7, class: gr16, preferred-register: '' }
 liveins:
-  - { reg: '$edi', virtual-reg: '%0' }
-  - { reg: '$esi', virtual-reg: '%1' }
-  - { reg: '$rdx', virtual-reg: '%2' }
+  - { reg: '$edi', virtual-reg: '%1' }
+  - { reg: '$esi', virtual-reg: '%2' }
+  - { reg: '$rdx', virtual-reg: '%3' }
 frameInfo:
   isFrameAddressTaken: false
   isReturnAddressTaken: false
@@ -429,7 +444,7 @@ body:             |
   ; CHECK-NEXT: bb.1.entry:
   ; CHECK-NEXT:   successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 57920, implicit-def dead $eflags
+  ; CHECK-NEXT:   [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 123456, implicit-def dead $eflags
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:gr16 = COPY [[AND32ri]].sub_16bit
   ; CHECK-NEXT:   TEST16rr [[COPY4]], [[COPY4]], implicit-def $eflags
   ; CHECK-NEXT:   JCC_1 %bb.3, 5, implicit $eflags
@@ -438,7 +453,7 @@ body:             |
   ; CHECK-NEXT: bb.2.if.then:
   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   MOV16mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s16) into %ir.2, align 4)
+  ; CHECK-NEXT:   MOV32mr [[COPY]], 1, $noreg, 0, $noreg, [[AND32ri]] :: (store (s32) into %ir.2)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.if.end:
   ; CHECK-NEXT:   [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags
@@ -449,19 +464,19 @@ body:             |
     successors: %bb.3(0x60000000), %bb.2(0x20000000)
     liveins: $edi, $esi, $rdx
 
-    %2:gr64 = COPY $rdx
-    %1:gr32 = COPY $esi
-    %0:gr32 = COPY $edi
-    %3:gr16 = COPY %0.sub_16bit
-    TEST16rr %3, %3, implicit-def $eflags
+    %3:gr64 = COPY $rdx
+    %2:gr32 = COPY $esi
+    %1:gr32 = COPY $edi
+    %5:gr16 = COPY %1.sub_16bit
+    TEST16rr %5, %5, implicit-def $eflags
     JCC_1 %bb.2, 4, implicit $eflags
     JMP_1 %bb.3
 
   bb.3.entry:
     successors: %bb.1(0x55555555), %bb.2(0x2aaaaaab)
 
-    %5:gr32 = AND32ri %1, 57920, implicit-def dead $eflags
-    %4:gr16 = COPY %5.sub_16bit
+    %0:gr32 = AND32ri %2, 123456, implicit-def dead $eflags
+    %4:gr16 = COPY %0.sub_16bit
     TEST16rr %4, %4, implicit-def $eflags
     JCC_1 %bb.2, 5, implicit $eflags
     JMP_1 %bb.1
@@ -469,7 +484,133 @@ body:             |
   bb.1.if.then:
     successors: %bb.2(0x80000000)
 
-    MOV16mr %2, 1, $noreg, 0, $noreg, %3 :: (store (s16) into %ir.2, align 4)
+    MOV32mr %3, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.2)
+
+  bb.2.if.end:
+    %6:gr32 = MOV32r0 implicit-def dead $eflags
+    %7:gr16 = COPY %6.sub_16bit
+    $ax = COPY %7
+    RET 0, $ax
+
+...
+---
+name:            erase_test16_sf
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   true
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: gr16, preferred-register: '' }
+  - { id: 1, class: gr32, preferred-register: '' }
+  - { id: 2, class: gr32, preferred-register: '' }
+  - { id: 3, class: gr64, preferred-register: '' }
+  - { id: 4, class: gr16, preferred-register: '' }
+  - { id: 5, class: gr32, preferred-register: '' }
+  - { id: 6, class: gr32, preferred-register: '' }
+  - { id: 7, class: gr16, preferred-register: '' }
+liveins:
+  - { reg: '$edi', virtual-reg: '%1' }
+  - { reg: '$esi', virtual-reg: '%2' }
+  - { reg: '$rdx', virtual-reg: '%3' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  ; CHECK-LABEL: name: erase_test16_sf
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x60000000), %bb.3(0x20000000)
+  ; CHECK-NEXT:   liveins: $edi, $esi, $rdx
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gr64 = COPY $rdx
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gr32 = COPY $esi
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gr32 = COPY $edi
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit
+  ; CHECK-NEXT:   TEST16rr [[COPY3]], [[COPY3]], implicit-def $eflags
+  ; CHECK-NEXT:   JCC_1 %bb.3, 4, implicit $eflags
+  ; CHECK-NEXT:   JMP_1 %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1.entry:
+  ; CHECK-NEXT:   successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 1234, implicit-def dead $eflags
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:gr16 = COPY [[AND32ri]].sub_16bit
+  ; CHECK-NEXT:   TEST16rr [[COPY4]], [[COPY4]], implicit-def $eflags
+  ; CHECK-NEXT:   JCC_1 %bb.3, 9, implicit $eflags
+  ; CHECK-NEXT:   JMP_1 %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.if.then:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   MOV16mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY4]] :: (store (s16) into %ir.2, align 4)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.if.end:
+  ; CHECK-NEXT:   [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:gr16 = COPY [[MOV32r0_]].sub_16bit
+  ; CHECK-NEXT:   $ax = COPY [[COPY5]]
+  ; CHECK-NEXT:   RET 0, $ax
+  bb.0.entry:
+    successors: %bb.3(0x60000000), %bb.2(0x20000000)
+    liveins: $edi, $esi, $rdx
+
+    %3:gr64 = COPY $rdx
+    %2:gr32 = COPY $esi
+    %1:gr32 = COPY $edi
+    %4:gr16 = COPY %1.sub_16bit
+    TEST16rr %4, %4, implicit-def $eflags
+    JCC_1 %bb.2, 4, implicit $eflags
+    JMP_1 %bb.3
+
+  bb.3.entry:
+    successors: %bb.1(0x55555555), %bb.2(0x2aaaaaab)
+
+    %5:gr32 = AND32ri %2, 1234, implicit-def dead $eflags
+    %0:gr16 = COPY %5.sub_16bit
+    TEST16rr %0, %0, implicit-def $eflags
+    JCC_1 %bb.2, 9, implicit $eflags
+    JMP_1 %bb.1
+
+  bb.1.if.then:
+    successors: %bb.2(0x80000000)
+
+    MOV16mr %3, 1, $noreg, 0, $noreg, %0 :: (store (s16) into %ir.2, align 4)
 
   bb.2.if.end:
     %6:gr32 = MOV32r0 implicit-def dead $eflags


        


More information about the llvm-commits mailing list