[llvm-branch-commits] [llvm] 81f8bda - [AArch64][GlobalISel] Do no skip zext in getTestBitReg. (#177991)

Tue Feb 3 01:17:10 PST 2026

Author: David Green
Date: 2026-02-03T09:17:00Z
New Revision: 81f8bda2ce1d69f7c3f5b0daf60f22f4e09995a8

URL: https://github.com/llvm/llvm-project/commit/81f8bda2ce1d69f7c3f5b0daf60f22f4e09995a8
DIFF: https://github.com/llvm/llvm-project/commit/81f8bda2ce1d69f7c3f5b0daf60f22f4e09995a8.diff

LOG: [AArch64][GlobalISel] Do no skip zext in getTestBitReg. (#177991)

We can, when attempting to lower to tbz, skip a zext that is then not
accounted for elsewhere. The attached test ends up with a tbz from an
extract that then does not properly zext the value extracted from the
vector. This patch fixes that by only looking through a G_ZEXT if the
bit checked is in the low part of the value, lining up the code with the
comment.

Fixes #173895

(cherry picked from commit 0321f3eeee5cceddc2541046ee155863f5f59585)

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-xor-tbz-tbnz.mir
    llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir
    llvm/test/CodeGen/AArch64/aarch64-tbz.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index f9db39e5f8622..ac659f100a863 100644

--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -1410,7 +1410,8 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
         !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
       break;
 
-    // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
+    // (tbz (any_ext x), b) -> (tbz x, b) and
+    // (tbz (zext x), b) -> (tbz x, b) if we don't use the extended bits.
     //
     // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
     // on the truncated x is the same as the bit number on x.
@@ -1423,6 +1424,9 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
       // Did we find something worth folding?
       if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
         break;
+      TypeSize InSize = MRI.getType(NextReg).getSizeInBits();
+      if (Bit >= InSize)
+        break;
 
       // NextReg is worth folding. Keep looking.
       Reg = NextReg;

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-xor-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-xor-tbz-tbnz.mir
index ed24193b58267..64d09ea682e4a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-xor-tbz-tbnz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-xor-tbz-tbnz.mir
@@ -134,9 +134,10 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32 = COPY $wzr
-  ; CHECK-NEXT:   [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32
-  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]]
-  ; CHECK-NEXT:   TBNZX [[COPY1]], 63, %bb.1
+  ; CHECK-NEXT:   [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr $wzr, [[COPY]]
+  ; CHECK-NEXT:   [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[ORNWrr]], 0
+  ; CHECK-NEXT:   [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+  ; CHECK-NEXT:   TBNZX [[SUBREG_TO_REG]], 63, %bb.1
   ; CHECK-NEXT:   B %bb.0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir
index 2f8409f9fd3e3..92a37ebf87d9a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir
@@ -53,10 +53,10 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
   ; CHECK-NEXT:   liveins: $w0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   %reg:gpr32all = COPY $w0
-  ; CHECK-NEXT:   [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, %reg, %subreg.sub_32
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]]
-  ; CHECK-NEXT:   TBZX [[COPY]], 33, %bb.1
+  ; CHECK-NEXT:   %reg:gpr32 = COPY $w0
+  ; CHECK-NEXT:   [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %reg, 0
+  ; CHECK-NEXT:   %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+  ; CHECK-NEXT:   TBZX %zext, 33, %bb.1
   ; CHECK-NEXT:   B %bb.0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
@@ -90,9 +90,9 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   %reg:gpr32 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, %reg, %subreg.sub_32
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]]
-  ; CHECK-NEXT:   TBZX [[COPY]], 33, %bb.1
+  ; CHECK-NEXT:   [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg, %subreg.sub_32
+  ; CHECK-NEXT:   %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 15
+  ; CHECK-NEXT:   TBZX %zext, 33, %bb.1
   ; CHECK-NEXT:   B %bb.0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-tbz.ll b/llvm/test/CodeGen/AArch64/aarch64-tbz.ll
index 5d0b3f5ef4f02..3a13bc7a7f677 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-tbz.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-tbz.ll
@@ -223,9 +223,10 @@ define i32 @tbzfromextract(<8 x i8> %b) {
 ;
 ; CHECK-GIO0-LABEL: tbzfromextract:
 ; CHECK-GIO0:       // %bb.0:
-; CHECK-GIO0-NEXT:    // kill: def $b0 killed $b0 killed $d0
-; CHECK-GIO0-NEXT:    // kill: def $s0 killed $b0
-; CHECK-GIO0-NEXT:    fmov w8, s0
+; CHECK-GIO0-NEXT:    fmov d1, d0
+; CHECK-GIO0-NEXT:    // implicit-def: $q0
+; CHECK-GIO0-NEXT:    fmov d0, d1
+; CHECK-GIO0-NEXT:    umov w8, v0.b[0]
 ; CHECK-GIO0-NEXT:    tbnz w8, #31, .LBB4_2
 ; CHECK-GIO0-NEXT:    b .LBB4_1
 ; CHECK-GIO0-NEXT:  .LBB4_1: // %land.rhs
@@ -246,8 +247,8 @@ land.end:
     ret i32 0
 }
 
-
 declare void @foo(i64, i64)
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK-GI: {{.*}}
 ; CHECK-SD: {{.*}}