[llvm] [AArch64][GISel] length aware BITCAST between different-element-width fixed vectors (PR #179123)

Shakil Ahmed via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 5 09:31:45 PST 2026


https://github.com/ahmedshakill updated https://github.com/llvm/llvm-project/pull/179123

>From d7832cb176ef5ae378413068f3fcab46912ed5a0 Mon Sep 17 00:00:00 2001
From: Shakil Ahmed <shakil.000024 at gmail.com>
Date: Sun, 1 Feb 2026 23:15:05 +0600
Subject: [PATCH 1/3] [AArch64][GISel] length aware BITCAST between
 different-element-width fixed vectors

---
 .../CodeGen/GlobalISel/GISelValueTracking.cpp | 54 ++++++++++++++++---
 .../compute-known-bits-bitcast-assertion.ll   | 38 +++++++++++++
 2 files changed, 86 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll

diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 34692f0b4c4ee..e8cad9eafb7f3 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -234,6 +234,44 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
     Known = Known.trunc(BitWidth);
     break;
   }
+
+  case TargetOpcode::G_BITCAST: {
+    Register SrcReg = MI.getOperand(1).getReg();
+    LLT SrcTy = MRI.getType(SrcReg);
+    APInt SrcDemanded;
+
+    // Only handle fixed vectors
+    if (SrcTy.isFixedVector() && DstTy.isFixedVector()) {
+      unsigned SrcNumElts = SrcTy.getNumElements();
+      unsigned DstNumElts = DstTy.getNumElements();
+
+      if (SrcNumElts == DstNumElts) {
+        SrcDemanded = DemandedElts;
+      } else if (DstNumElts > SrcNumElts) {
+        unsigned Ratio = DstNumElts / SrcNumElts;
+        SrcDemanded = APInt(SrcNumElts, 0);
+        for (unsigned i = 0; i < SrcNumElts; ++i) {
+          if (DemandedElts.extractBits(Ratio, i * Ratio).getBoolValue())
+            SrcDemanded.setBit(i);
+        }
+      } else {
+        unsigned Ratio = SrcNumElts / DstNumElts;
+        SrcDemanded = APInt(SrcNumElts, 0);
+        for (unsigned i = 0; i < DstNumElts; ++i) {
+          if (DemandedElts[i]) {
+            SrcDemanded.setBits(i * Ratio, (i + 1) * Ratio);
+          }
+        }
+      }
+    } else {
+      SrcDemanded = SrcTy.isFixedVector()
+                        ? APInt::getAllOnes(SrcTy.getNumElements())
+                        : APInt(1, 1);
+    }
+    computeKnownBitsImpl(SrcReg, Known2, SrcDemanded, Depth + 1);
+    Known = Known2;
+    break;
+  }
   case TargetOpcode::COPY:
   case TargetOpcode::G_PHI:
   case TargetOpcode::PHI: {
@@ -258,12 +296,16 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
       // it's always defined to be 0 by tablegen.
       if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
           SrcTy.isValid()) {
-        // In case we're forwarding from a vector register to a non-vector
-        // register we need to update the demanded elements to reflect this
-        // before recursing.
-        APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
-                                    ? APInt::getAllOnes(SrcTy.getNumElements())
-                                    : DemandedElts; // Known to be APInt(1, 1)
+        APInt NowDemandedElts;
+        if (!SrcTy.isFixedVector()) {
+          NowDemandedElts = APInt(1, 1);
+        } else if (DstTy.isFixedVector() &&
+                   SrcTy.getNumElements() == DstTy.getNumElements()) {
+          NowDemandedElts = DemandedElts;
+        } else {
+          NowDemandedElts = APInt::getAllOnes(SrcTy.getNumElements());
+        }
+
         // For COPYs we don't do anything, don't increase the depth.
         computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts,
                              Depth + (Opcode != TargetOpcode::COPY));
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll b/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
new file mode 100644
index 0000000000000..ad3dc2b55c3ef
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
@@ -0,0 +1,38 @@
+; REQUIRES: aarch64-registered-target
+; RUN: llc -mtriple=aarch64-unknown-linux-musl -global-isel -global-isel-abort=1 < %s | FileCheck %s
+
+define <2 x i8> @test_bitcast_assertion(<4 x i32> %vqaddq_v2.i.i, ptr %BS_VAR_0) {
+; CHECK-LABEL: test_bitcast_assertion:
+; CHECK:       sub sp, sp, #16
+; CHECK:       movi v[[ZERO_REG:[0-9]+]].2d, #0
+; CHECK:       mov [[PTR_TMP:x[0-9]+]], sp
+
+; CHECK:       .LBB0_1: // %for.cond
+; CHECK:       umov [[EXTRACTED:w[0-9]+]], v[[ZERO_REG]].h[0]
+; CHECK:       str q0, [sp]
+; CHECK:       umull [[IDX:x[0-9]+]], [[EXTRACTED]], w9
+; CHECK:       ldrh w[[VAL_REG:[0-9]+]], [[[PTR_TMP]], [[IDX]]]
+; CHECK:       stp q[[ZERO_REG]], q[[ZERO_REG]], [x0, #32]
+; CHECK:       stp q[[ZERO_REG]], q[[ZERO_REG]], [x0, #64]
+; CHECK:       fmov d[[RES_REG:[0-9]+]], x[[VAL_REG]]
+; CHECK:       stp q[[ZERO_REG]], q[[ZERO_REG]], [x0, #96]
+; CHECK:       mov v[[RES_REG]].d[1], xzr
+; CHECK:       stp q[[RES_REG]], q[[ZERO_REG]], [x0]
+; CHECK:       b .LBB0_1
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  %0 = phi <64 x i16> [ %2, %for.cond ], [ zeroinitializer, %entry ]
+  %conv = extractelement <64 x i16> %0, i64 0
+  %vecext.i = extractelement <4 x i32> %vqaddq_v2.i.i, i16 %conv
+  %1 = and i32 %vecext.i, 65535
+  %conv1 = zext i32 %1 to i64
+  %vecinit16 = insertelement <16 x i64> zeroinitializer, i64 %conv1, i64 0
+  store <16 x i64> %vecinit16, ptr %BS_VAR_0, align 16
+  %2 = bitcast <16 x i64> zeroinitializer to <64 x i16>
+  br label %for.cond
+}
+
+

>From 68a96212ece541aaf7ddfff62bb970c6ccff7a4e Mon Sep 17 00:00:00 2001
From: Shakil Ahmed <shakil.000024 at gmail.com>
Date: Wed, 4 Feb 2026 23:45:04 +0600
Subject: [PATCH 2/3] format test, properly name variables

---
 .../compute-known-bits-bitcast-assertion.ll   | 53 ++++++++++---------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll b/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
index ad3dc2b55c3ef..37e4f206bb3cb 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
@@ -1,38 +1,39 @@
-; REQUIRES: aarch64-registered-target
-; RUN: llc -mtriple=aarch64-unknown-linux-musl -global-isel -global-isel-abort=1 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-unknown-linux -global-isel -global-isel-abort=1 < %s | FileCheck %s
 
 define <2 x i8> @test_bitcast_assertion(<4 x i32> %vqaddq_v2.i.i, ptr %BS_VAR_0) {
 ; CHECK-LABEL: test_bitcast_assertion:
-; CHECK:       sub sp, sp, #16
-; CHECK:       movi v[[ZERO_REG:[0-9]+]].2d, #0
-; CHECK:       mov [[PTR_TMP:x[0-9]+]], sp
-
-; CHECK:       .LBB0_1: // %for.cond
-; CHECK:       umov [[EXTRACTED:w[0-9]+]], v[[ZERO_REG]].h[0]
-; CHECK:       str q0, [sp]
-; CHECK:       umull [[IDX:x[0-9]+]], [[EXTRACTED]], w9
-; CHECK:       ldrh w[[VAL_REG:[0-9]+]], [[[PTR_TMP]], [[IDX]]]
-; CHECK:       stp q[[ZERO_REG]], q[[ZERO_REG]], [x0, #32]
-; CHECK:       stp q[[ZERO_REG]], q[[ZERO_REG]], [x0, #64]
-; CHECK:       fmov d[[RES_REG:[0-9]+]], x[[VAL_REG]]
-; CHECK:       stp q[[ZERO_REG]], q[[ZERO_REG]], [x0, #96]
-; CHECK:       mov v[[RES_REG]].d[1], xzr
-; CHECK:       stp q[[RES_REG]], q[[ZERO_REG]], [x0]
-; CHECK:       b .LBB0_1
-
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    mov w9, #4 // =0x4
+; CHECK-NEXT:  .LBB0_1: // %for.cond
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    umov w10, v1.h[0]
+; CHECK-NEXT:    str q0, [sp]
+; CHECK-NEXT:    umull x10, w10, w9
+; CHECK-NEXT:    ldrh w10, [x8, x10]
+; CHECK-NEXT:    stp q1, q1, [x0, #32]
+; CHECK-NEXT:    stp q1, q1, [x0, #64]
+; CHECK-NEXT:    fmov d2, x10
+; CHECK-NEXT:    stp q1, q1, [x0, #96]
+; CHECK-NEXT:    mov v2.d[1], xzr
+; CHECK-NEXT:    stp q2, q1, [x0]
+; CHECK-NEXT:    b .LBB0_1
 entry:
   br label %for.cond
 
 for.cond:                                         ; preds = %for.cond, %entry
-  %0 = phi <64 x i16> [ %2, %for.cond ], [ zeroinitializer, %entry ]
-  %conv = extractelement <64 x i16> %0, i64 0
+  %vec_wide = phi <64 x i16> [ %bitcasted_vec, %for.cond ], [ zeroinitializer, %entry ]
+  %conv = extractelement <64 x i16> %vec_wide, i64 0
   %vecext.i = extractelement <4 x i32> %vqaddq_v2.i.i, i16 %conv
-  %1 = and i32 %vecext.i, 65535
-  %conv1 = zext i32 %1 to i64
-  %vecinit16 = insertelement <16 x i64> zeroinitializer, i64 %conv1, i64 0
+  %masked_val = and i32 %vecext.i, 65535
+  %val_i64 = zext i32 %masked_val to i64
+  %vecinit16 = insertelement <16 x i64> zeroinitializer, i64 %val_i64, i64 0
   store <16 x i64> %vecinit16, ptr %BS_VAR_0, align 16
-  %2 = bitcast <16 x i64> zeroinitializer to <64 x i16>
+  %bitcasted_vec = bitcast <16 x i64> zeroinitializer to <64 x i16>
   br label %for.cond
 }
 
-

>From 2dbc622f3fb1cd5b74e9e64cf72149876be6c15f Mon Sep 17 00:00:00 2001
From: Shakil Ahmed <shakil.000024 at gmail.com>
Date: Thu, 5 Feb 2026 23:31:08 +0600
Subject: [PATCH 3/3] minimal fix for the bitcast vector-length assertion
 failure

---
 .../CodeGen/GlobalISel/GISelValueTracking.cpp | 38 -------------------
 .../compute-known-bits-bitcast-assertion.ll   |  1 +
 2 files changed, 1 insertion(+), 38 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index e8cad9eafb7f3..6d6bc87f77ad7 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -234,44 +234,6 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
     Known = Known.trunc(BitWidth);
     break;
   }
-
-  case TargetOpcode::G_BITCAST: {
-    Register SrcReg = MI.getOperand(1).getReg();
-    LLT SrcTy = MRI.getType(SrcReg);
-    APInt SrcDemanded;
-
-    // Only handle fixed vectors
-    if (SrcTy.isFixedVector() && DstTy.isFixedVector()) {
-      unsigned SrcNumElts = SrcTy.getNumElements();
-      unsigned DstNumElts = DstTy.getNumElements();
-
-      if (SrcNumElts == DstNumElts) {
-        SrcDemanded = DemandedElts;
-      } else if (DstNumElts > SrcNumElts) {
-        unsigned Ratio = DstNumElts / SrcNumElts;
-        SrcDemanded = APInt(SrcNumElts, 0);
-        for (unsigned i = 0; i < SrcNumElts; ++i) {
-          if (DemandedElts.extractBits(Ratio, i * Ratio).getBoolValue())
-            SrcDemanded.setBit(i);
-        }
-      } else {
-        unsigned Ratio = SrcNumElts / DstNumElts;
-        SrcDemanded = APInt(SrcNumElts, 0);
-        for (unsigned i = 0; i < DstNumElts; ++i) {
-          if (DemandedElts[i]) {
-            SrcDemanded.setBits(i * Ratio, (i + 1) * Ratio);
-          }
-        }
-      }
-    } else {
-      SrcDemanded = SrcTy.isFixedVector()
-                        ? APInt::getAllOnes(SrcTy.getNumElements())
-                        : APInt(1, 1);
-    }
-    computeKnownBitsImpl(SrcReg, Known2, SrcDemanded, Depth + 1);
-    Known = Known2;
-    break;
-  }
   case TargetOpcode::COPY:
   case TargetOpcode::G_PHI:
   case TargetOpcode::PHI: {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll b/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
index 37e4f206bb3cb..2002eee0be690 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
@@ -13,6 +13,7 @@ define <2 x i8> @test_bitcast_assertion(<4 x i32> %vqaddq_v2.i.i, ptr %BS_VAR_0)
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    umov w10, v1.h[0]
 ; CHECK-NEXT:    str q0, [sp]
+; CHECK-NEXT:    and x10, x10, #0x3
 ; CHECK-NEXT:    umull x10, w10, w9
 ; CHECK-NEXT:    ldrh w10, [x8, x10]
 ; CHECK-NEXT:    stp q1, q1, [x0, #32]



More information about the llvm-commits mailing list