[llvm] 5f48c14 - [AArch64][GlobalISel] Use ZExtValue for zext(xor) when invert tb(n)z

Sun Sep 5 20:15:50 PDT 2021

Author: guopeilin
Date: 2021-09-06T11:12:07+08:00
New Revision: 5f48c144c58f6d23e850a1978a6fe05887103b17

URL: https://github.com/llvm/llvm-project/commit/5f48c144c58f6d23e850a1978a6fe05887103b17
DIFF: https://github.com/llvm/llvm-project/commit/5f48c144c58f6d23e850a1978a6fe05887103b17.diff

LOG: [AArch64][GlobalISel] Use ZExtValue for zext(xor) when invert tb(n)z

Currently, we use SExtValue to decide whether to invert tbz or tbnz.
However, for the case zext (xor x, c), we should use ZExt rather
than SExt otherwise we will generate totally opposite branches.

Reviewed By: paquette

Differential Revision: https://reviews.llvm.org/D108755

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-xor-tbz-tbnz.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 88439c76919e4..62caf28a3f328 100644

--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -1314,6 +1314,7 @@ static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
                               MachineRegisterInfo &MRI) {
   assert(Reg.isValid() && "Expected valid register!");
+  bool HasZext = false;
   while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
     unsigned Opc = MI->getOpcode();
 
@@ -1327,6 +1328,9 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
     // on the truncated x is the same as the bit number on x.
     if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
         Opc == TargetOpcode::G_TRUNC) {
+      if (Opc == TargetOpcode::G_ZEXT)
+        HasZext = true;
+
       Register NextReg = MI->getOperand(1).getReg();
       // Did we find something worth folding?
       if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
@@ -1355,8 +1359,12 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
         std::swap(ConstantReg, TestReg);
         VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
       }
-      if (VRegAndVal)
-        C = VRegAndVal->Value.getSExtValue();
+      if (VRegAndVal) {
+        if (HasZext)
+          C = VRegAndVal->Value.getZExtValue();
+        else
+          C = VRegAndVal->Value.getSExtValue();
+      }
       break;
     }
     case TargetOpcode::G_ASHR:

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-xor-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-xor-tbz-tbnz.mir
index 8e19ba41b2c0c..53ea6830fdc89 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-xor-tbz-tbnz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-xor-tbz-tbnz.mir
@@ -117,6 +117,38 @@ body:             |
     RET_ReallyLR
 ...
 ---
+name:            dont_flip_eq_zext
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: dont_flip_eq_zext
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32 = COPY $wzr
+  ; CHECK:   [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]]
+  ; CHECK:   TBNZX [[COPY1]], 63, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0(0x40000000), %bb.1(0x40000000)
+
+    %1:gpr(s32) = G_CONSTANT i32 0
+    %3:gpr(s32) = G_CONSTANT i32 -1
+    %4:gpr(s32) = G_XOR %1, %3
+    %5:gpr(s64) = G_ZEXT %4(s32)
+    %15:gpr(s64) = G_CONSTANT i64 0
+    %13:gpr(s32) = G_ICMP intpred(slt), %5(s64), %15
+    %7:gpr(s1) = G_TRUNC %13(s32)
+    G_BRCOND %7(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+...
+---
 name:            dont_flip_ne
 alignment:       4
 legalized:       true