[llvm] [AArch64][GISel] length aware BITCAST between different-element-width fixed vectors (PR #179123)
Shakil Ahmed via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 5 09:31:45 PST 2026
https://github.com/ahmedshakill updated https://github.com/llvm/llvm-project/pull/179123
>From d7832cb176ef5ae378413068f3fcab46912ed5a0 Mon Sep 17 00:00:00 2001
From: Shakil Ahmed <shakil.000024 at gmail.com>
Date: Sun, 1 Feb 2026 23:15:05 +0600
Subject: [PATCH 1/3] [AArch64][GISel] length aware BITCAST between
different-element-width fixed vectors
---
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 54 ++++++++++++++++---
.../compute-known-bits-bitcast-assertion.ll | 38 +++++++++++++
2 files changed, 86 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 34692f0b4c4ee..e8cad9eafb7f3 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -234,6 +234,44 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
Known = Known.trunc(BitWidth);
break;
}
+
+ case TargetOpcode::G_BITCAST: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ APInt SrcDemanded;
+
+ // Only handle fixed vectors
+ if (SrcTy.isFixedVector() && DstTy.isFixedVector()) {
+ unsigned SrcNumElts = SrcTy.getNumElements();
+ unsigned DstNumElts = DstTy.getNumElements();
+
+ if (SrcNumElts == DstNumElts) {
+ SrcDemanded = DemandedElts;
+ } else if (DstNumElts > SrcNumElts) {
+ unsigned Ratio = DstNumElts / SrcNumElts;
+ SrcDemanded = APInt(SrcNumElts, 0);
+ for (unsigned i = 0; i < SrcNumElts; ++i) {
+ if (DemandedElts.extractBits(Ratio, i * Ratio).getBoolValue())
+ SrcDemanded.setBit(i);
+ }
+ } else {
+ unsigned Ratio = SrcNumElts / DstNumElts;
+ SrcDemanded = APInt(SrcNumElts, 0);
+ for (unsigned i = 0; i < DstNumElts; ++i) {
+ if (DemandedElts[i]) {
+ SrcDemanded.setBits(i * Ratio, (i + 1) * Ratio);
+ }
+ }
+ }
+ } else {
+ SrcDemanded = SrcTy.isFixedVector()
+ ? APInt::getAllOnes(SrcTy.getNumElements())
+ : APInt(1, 1);
+ }
+ computeKnownBitsImpl(SrcReg, Known2, SrcDemanded, Depth + 1);
+ Known = Known2;
+ break;
+ }
case TargetOpcode::COPY:
case TargetOpcode::G_PHI:
case TargetOpcode::PHI: {
@@ -258,12 +296,16 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
// it's always defined to be 0 by tablegen.
if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
SrcTy.isValid()) {
- // In case we're forwarding from a vector register to a non-vector
- // register we need to update the demanded elements to reflect this
- // before recursing.
- APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
- ? APInt::getAllOnes(SrcTy.getNumElements())
- : DemandedElts; // Known to be APInt(1, 1)
+ APInt NowDemandedElts;
+ if (!SrcTy.isFixedVector()) {
+ NowDemandedElts = APInt(1, 1);
+ } else if (DstTy.isFixedVector() &&
+ SrcTy.getNumElements() == DstTy.getNumElements()) {
+ NowDemandedElts = DemandedElts;
+ } else {
+ NowDemandedElts = APInt::getAllOnes(SrcTy.getNumElements());
+ }
+
// For COPYs we don't do anything, don't increase the depth.
computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts,
Depth + (Opcode != TargetOpcode::COPY));
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll b/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
new file mode 100644
index 0000000000000..ad3dc2b55c3ef
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
@@ -0,0 +1,38 @@
+; REQUIRES: aarch64-registered-target
+; RUN: llc -mtriple=aarch64-unknown-linux-musl -global-isel -global-isel-abort=1 < %s | FileCheck %s
+
+define <2 x i8> @test_bitcast_assertion(<4 x i32> %vqaddq_v2.i.i, ptr %BS_VAR_0) {
+; CHECK-LABEL: test_bitcast_assertion:
+; CHECK: sub sp, sp, #16
+; CHECK: movi v[[ZERO_REG:[0-9]+]].2d, #0
+; CHECK: mov [[PTR_TMP:x[0-9]+]], sp
+
+; CHECK: .LBB0_1: // %for.cond
+; CHECK: umov [[EXTRACTED:w[0-9]+]], v[[ZERO_REG]].h[0]
+; CHECK: str q0, [sp]
+; CHECK: umull [[IDX:x[0-9]+]], [[EXTRACTED]], w9
+; CHECK: ldrh w[[VAL_REG:[0-9]+]], [[[PTR_TMP]], [[IDX]]]
+; CHECK: stp q[[ZERO_REG]], q[[ZERO_REG]], [x0, #32]
+; CHECK: stp q[[ZERO_REG]], q[[ZERO_REG]], [x0, #64]
+; CHECK: fmov d[[RES_REG:[0-9]+]], x[[VAL_REG]]
+; CHECK: stp q[[ZERO_REG]], q[[ZERO_REG]], [x0, #96]
+; CHECK: mov v[[RES_REG]].d[1], xzr
+; CHECK: stp q[[RES_REG]], q[[ZERO_REG]], [x0]
+; CHECK: b .LBB0_1
+
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.cond, %entry
+ %0 = phi <64 x i16> [ %2, %for.cond ], [ zeroinitializer, %entry ]
+ %conv = extractelement <64 x i16> %0, i64 0
+ %vecext.i = extractelement <4 x i32> %vqaddq_v2.i.i, i16 %conv
+ %1 = and i32 %vecext.i, 65535
+ %conv1 = zext i32 %1 to i64
+ %vecinit16 = insertelement <16 x i64> zeroinitializer, i64 %conv1, i64 0
+ store <16 x i64> %vecinit16, ptr %BS_VAR_0, align 16
+ %2 = bitcast <16 x i64> zeroinitializer to <64 x i16>
+ br label %for.cond
+}
+
+
>From 68a96212ece541aaf7ddfff62bb970c6ccff7a4e Mon Sep 17 00:00:00 2001
From: Shakil Ahmed <shakil.000024 at gmail.com>
Date: Wed, 4 Feb 2026 23:45:04 +0600
Subject: [PATCH 2/3] format test, properly name variables
---
.../compute-known-bits-bitcast-assertion.ll | 53 ++++++++++---------
1 file changed, 27 insertions(+), 26 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll b/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
index ad3dc2b55c3ef..37e4f206bb3cb 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
@@ -1,38 +1,39 @@
-; REQUIRES: aarch64-registered-target
-; RUN: llc -mtriple=aarch64-unknown-linux-musl -global-isel -global-isel-abort=1 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-unknown-linux -global-isel -global-isel-abort=1 < %s | FileCheck %s
define <2 x i8> @test_bitcast_assertion(<4 x i32> %vqaddq_v2.i.i, ptr %BS_VAR_0) {
; CHECK-LABEL: test_bitcast_assertion:
-; CHECK: sub sp, sp, #16
-; CHECK: movi v[[ZERO_REG:[0-9]+]].2d, #0
-; CHECK: mov [[PTR_TMP:x[0-9]+]], sp
-
-; CHECK: .LBB0_1: // %for.cond
-; CHECK: umov [[EXTRACTED:w[0-9]+]], v[[ZERO_REG]].h[0]
-; CHECK: str q0, [sp]
-; CHECK: umull [[IDX:x[0-9]+]], [[EXTRACTED]], w9
-; CHECK: ldrh w[[VAL_REG:[0-9]+]], [[[PTR_TMP]], [[IDX]]]
-; CHECK: stp q[[ZERO_REG]], q[[ZERO_REG]], [x0, #32]
-; CHECK: stp q[[ZERO_REG]], q[[ZERO_REG]], [x0, #64]
-; CHECK: fmov d[[RES_REG:[0-9]+]], x[[VAL_REG]]
-; CHECK: stp q[[ZERO_REG]], q[[ZERO_REG]], [x0, #96]
-; CHECK: mov v[[RES_REG]].d[1], xzr
-; CHECK: stp q[[RES_REG]], q[[ZERO_REG]], [x0]
-; CHECK: b .LBB0_1
-
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: mov w9, #4 // =0x4
+; CHECK-NEXT: .LBB0_1: // %for.cond
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: umov w10, v1.h[0]
+; CHECK-NEXT: str q0, [sp]
+; CHECK-NEXT: umull x10, w10, w9
+; CHECK-NEXT: ldrh w10, [x8, x10]
+; CHECK-NEXT: stp q1, q1, [x0, #32]
+; CHECK-NEXT: stp q1, q1, [x0, #64]
+; CHECK-NEXT: fmov d2, x10
+; CHECK-NEXT: stp q1, q1, [x0, #96]
+; CHECK-NEXT: mov v2.d[1], xzr
+; CHECK-NEXT: stp q2, q1, [x0]
+; CHECK-NEXT: b .LBB0_1
entry:
br label %for.cond
for.cond: ; preds = %for.cond, %entry
- %0 = phi <64 x i16> [ %2, %for.cond ], [ zeroinitializer, %entry ]
- %conv = extractelement <64 x i16> %0, i64 0
+ %vec_wide = phi <64 x i16> [ %bitcasted_vec, %for.cond ], [ zeroinitializer, %entry ]
+ %conv = extractelement <64 x i16> %vec_wide, i64 0
%vecext.i = extractelement <4 x i32> %vqaddq_v2.i.i, i16 %conv
- %1 = and i32 %vecext.i, 65535
- %conv1 = zext i32 %1 to i64
- %vecinit16 = insertelement <16 x i64> zeroinitializer, i64 %conv1, i64 0
+ %masked_val = and i32 %vecext.i, 65535
+ %val_i64 = zext i32 %masked_val to i64
+ %vecinit16 = insertelement <16 x i64> zeroinitializer, i64 %val_i64, i64 0
store <16 x i64> %vecinit16, ptr %BS_VAR_0, align 16
- %2 = bitcast <16 x i64> zeroinitializer to <64 x i16>
+ %bitcasted_vec = bitcast <16 x i64> zeroinitializer to <64 x i16>
br label %for.cond
}
-
>From 2dbc622f3fb1cd5b74e9e64cf72149876be6c15f Mon Sep 17 00:00:00 2001
From: Shakil Ahmed <shakil.000024 at gmail.com>
Date: Thu, 5 Feb 2026 23:31:08 +0600
Subject: [PATCH 3/3] minimal fix for the bitcast vector-length assertion
failure
---
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 38 -------------------
.../compute-known-bits-bitcast-assertion.ll | 1 +
2 files changed, 1 insertion(+), 38 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index e8cad9eafb7f3..6d6bc87f77ad7 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -234,44 +234,6 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
Known = Known.trunc(BitWidth);
break;
}
-
- case TargetOpcode::G_BITCAST: {
- Register SrcReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- APInt SrcDemanded;
-
- // Only handle fixed vectors
- if (SrcTy.isFixedVector() && DstTy.isFixedVector()) {
- unsigned SrcNumElts = SrcTy.getNumElements();
- unsigned DstNumElts = DstTy.getNumElements();
-
- if (SrcNumElts == DstNumElts) {
- SrcDemanded = DemandedElts;
- } else if (DstNumElts > SrcNumElts) {
- unsigned Ratio = DstNumElts / SrcNumElts;
- SrcDemanded = APInt(SrcNumElts, 0);
- for (unsigned i = 0; i < SrcNumElts; ++i) {
- if (DemandedElts.extractBits(Ratio, i * Ratio).getBoolValue())
- SrcDemanded.setBit(i);
- }
- } else {
- unsigned Ratio = SrcNumElts / DstNumElts;
- SrcDemanded = APInt(SrcNumElts, 0);
- for (unsigned i = 0; i < DstNumElts; ++i) {
- if (DemandedElts[i]) {
- SrcDemanded.setBits(i * Ratio, (i + 1) * Ratio);
- }
- }
- }
- } else {
- SrcDemanded = SrcTy.isFixedVector()
- ? APInt::getAllOnes(SrcTy.getNumElements())
- : APInt(1, 1);
- }
- computeKnownBitsImpl(SrcReg, Known2, SrcDemanded, Depth + 1);
- Known = Known2;
- break;
- }
case TargetOpcode::COPY:
case TargetOpcode::G_PHI:
case TargetOpcode::PHI: {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll b/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
index 37e4f206bb3cb..2002eee0be690 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/compute-known-bits-bitcast-assertion.ll
@@ -13,6 +13,7 @@ define <2 x i8> @test_bitcast_assertion(<4 x i32> %vqaddq_v2.i.i, ptr %BS_VAR_0)
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: umov w10, v1.h[0]
; CHECK-NEXT: str q0, [sp]
+; CHECK-NEXT: and x10, x10, #0x3
; CHECK-NEXT: umull x10, w10, w9
; CHECK-NEXT: ldrh w10, [x8, x10]
; CHECK-NEXT: stp q1, q1, [x0, #32]
More information about the llvm-commits
mailing list